Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton:

 - fsnotify fix

 - poll() timeout fix

 - a few scripts/ tweaks

 - debugobjects updates

 - the (small) ocfs2 queue

 - Minor fixes to kernel/padata.c

 - Maybe half of the MM queue

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits)
  mm, page_alloc: restore the original nodemask if the fast path allocation failed
  mm, page_alloc: uninline the bad page part of check_new_page()
  mm, page_alloc: don't duplicate code in free_pcp_prepare
  mm, page_alloc: defer debugging checks of pages allocated from the PCP
  mm, page_alloc: defer debugging checks of freed pages until a PCP drain
  cpuset: use static key better and convert to new API
  mm, page_alloc: inline pageblock lookup in page free fast paths
  mm, page_alloc: remove unnecessary variable from free_pcppages_bulk
  mm, page_alloc: pull out side effects from free_pages_check
  mm, page_alloc: un-inline the bad part of free_pages_check
  mm, page_alloc: check multiple page fields with a single branch
  mm, page_alloc: remove field from alloc_context
  mm, page_alloc: avoid looking up the first zone in a zonelist twice
  mm, page_alloc: shortcut watermark checks for order-0 pages
  mm, page_alloc: reduce cost of fair zone allocation policy retry
  mm, page_alloc: shorten the page allocator fast path
  mm, page_alloc: check once if a zone has isolated pageblocks
  mm, page_alloc: move __GFP_HARDWALL modifications out of the fastpath
  mm, page_alloc: simplify last cpupid reset
  mm, page_alloc: remove unnecessary initialisation from __alloc_pages_nodemask()
  ...
diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-savu b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-savu
index f1e02a9..99fda67 100644
--- a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-savu
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-savu
@@ -3,9 +3,10 @@
 Contact:	Stefan Achatz <erazor_de@users.sourceforge.net>
 Description:	The mouse can store 5 profiles which can be switched by the
 		press of a button. A profile is split into general settings and
-		button settings. buttons holds informations about button layout.
-		When written, this file lets one write the respective profile
-		buttons to the mouse. The data has to be 47 bytes long.
+		button settings. The buttons variable holds information about
+		button layout. When written, this file lets one write the
+		respective profile buttons to the mouse. The data has to be
+		47 bytes long.
 		The mouse will reject invalid data.
 		Which profile to write is determined by the profile number
 		contained in the data.
@@ -26,8 +27,8 @@
 Contact:	Stefan Achatz <erazor_de@users.sourceforge.net>
 Description:	The mouse can store 5 profiles which can be switched by the
 		press of a button. A profile is split into general settings and
-		button settings. profile holds informations like resolution, sensitivity
-		and light effects.
+		button settings. A profile holds information like resolution,
+		sensitivity and light effects.
 		When written, this file lets one write the respective profile
 		settings back to the mouse. The data has to be 43 bytes long.
 		The mouse will reject invalid data.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index f893337..ec27c6c 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -4,7 +4,7 @@
 Description:
 		Provides access to the binary "24x7 catalog" provided by the
 		hypervisor on POWER7 and 8 systems. This catalog lists events
-		avaliable from the powerpc "hv_24x7" pmu. Its format is
+		available from the powerpc "hv_24x7" pmu. Its format is
 		documented here:
 		https://raw.githubusercontent.com/jmesmon/catalog-24x7/master/hv-24x7-catalog.h
 
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-picolcd b/Documentation/ABI/testing/sysfs-driver-hid-picolcd
index 08579e7..98fd81a 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-picolcd
+++ b/Documentation/ABI/testing/sysfs-driver-hid-picolcd
@@ -39,5 +39,5 @@
 		Note: As device can barely do 2 complete refreshes a second
 		it only makes sense to adjust this value if only one or two
 		tiles get changed and it's not appropriate to expect the application
-		to flush it's tiny changes explicitely at higher than default rate.
+		to flush its tiny changes explicitly at higher than default rate.
 
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
index b4436cca..c7fc72d 100644
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -169,7 +169,7 @@
 		to enable/disable/clear ACPI interrupts in user space, which can be
 		used to debug some ACPI interrupt storm issues.
 
-		Note that only writting to VALID GPE/Fixed Event is allowed,
+		Note that only writing to VALID GPE/Fixed Event is allowed,
 		i.e. user can only change the status of runtime GPE and
 		Fixed Event with event handler installed.
 
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma
new file mode 100644
index 0000000..d364415
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -0,0 +1,9 @@
+What:		/sys/devices/platform/hidma-*/chid
+		/sys/devices/platform/QCOM8061:*/chid
+Date:		Dec 2015
+KernelVersion:	4.4
+Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Description:
+		Contains the ID of the channel within the HIDMA instance.
+		It is used to associate a given HIDMA channel with the
+		priority and weight calls in the management interface.
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 81efa88..e2e5484 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -2841,7 +2841,7 @@
 overall average bitrate for the stream and keeps it below or equal to the set bitrate. In the first case
 the average bitrate for the whole stream will be smaller then the set bitrate. This is caused because the
 average is calculated for smaller number of frames, on the other hand enabling this setting will ensure that
-the stream will meet tight bandwidth contraints. Applicable to encoders.
+the stream will meet tight bandwidth constraints. Applicable to encoders.
 </entry>
 	      </row>
 	      <row><entry></entry></row>
diff --git a/Documentation/DocBook/media/v4l/dev-raw-vbi.xml b/Documentation/DocBook/media/v4l/dev-raw-vbi.xml
index f4b61b6..78599bb 100644
--- a/Documentation/DocBook/media/v4l/dev-raw-vbi.xml
+++ b/Documentation/DocBook/media/v4l/dev-raw-vbi.xml
@@ -85,7 +85,7 @@
 results of <constant>VIDIOC_G_FMT</constant>, and call the
 &VIDIOC-S-FMT; ioctl with a pointer to this structure. Drivers return
 an &EINVAL; only when the given parameters are ambiguous, otherwise
-they modify the parameters according to the hardware capabilites and
+they modify the parameters according to the hardware capabilities and
 return the actual parameters. When the driver allocates resources at
 this point, it may return an &EBUSY; to indicate the returned
 parameters are valid but the required resources are currently not
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-selection.xml b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
index 9523bc5..997f4e9 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
@@ -216,7 +216,7 @@
 	<term><errorcode>ERANGE</errorcode></term>
 	<listitem>
 	  <para>It is not possible to adjust &v4l2-rect; <structfield>
-r</structfield> rectangle to satisfy all contraints given in the
+r</structfield> rectangle to satisfy all constraints given in the
 <structfield>flags</structfield> argument.</para>
 	</listitem>
       </varlistentry>
diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
index 8d990bd..82001a2 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/IRQ-domain.txt
@@ -70,6 +70,7 @@
 
 ==== Linear ====
 irq_domain_add_linear()
+irq_domain_create_linear()
 
 The linear reverse map maintains a fixed size table indexed by the
 hwirq number.  When a hwirq is mapped, an irq_desc is allocated for
@@ -81,10 +82,16 @@
 allocated for in-use IRQs.  The disadvantage is that the table must be
 as large as the largest possible hwirq number.
 
+irq_domain_add_linear() and irq_domain_create_linear() are functionally
+equivalent, except for the first argument is different - the former
+accepts an Open Firmware specific 'struct device_node', while the latter
+accepts a more general abstraction 'struct fwnode_handle'.
+
 The majority of drivers should use the linear map.
 
 ==== Tree ====
 irq_domain_add_tree()
+irq_domain_create_tree()
 
 The irq_domain maintains a radix tree map from hwirq numbers to Linux
 IRQs.  When an hwirq is mapped, an irq_desc is allocated and the
@@ -95,6 +102,11 @@
 hwirq number.  The disadvantage is that hwirq to IRQ number lookup is
 dependent on how many entries are in the table.
 
+irq_domain_add_tree() and irq_domain_create_tree() are functionally
+equivalent, except for the first argument is different - the former
+accepts an Open Firmware specific 'struct device_node', while the latter
+accepts a more general abstraction 'struct fwnode_handle'.
+
 Very few drivers should need this mapping.
 
 ==== No Map ===-
diff --git a/Documentation/Makefile b/Documentation/Makefile
index f3b04d2..de955e1 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -1,3 +1,3 @@
-subdir-y := accounting auxdisplay blackfin connector \
+subdir-y := accounting auxdisplay blackfin \
 	filesystems filesystems ia64 laptops mic misc-devices \
 	networking pcmcia prctl ptp timers vDSO watchdog
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 370ca00..9bccf16 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -176,13 +176,13 @@
 which Mathieu Desnoyers is now maintaining [MathieuDesnoyers2009URCU]
 [MathieuDesnoyersPhD].  TINY_RCU [PaulEMcKenney2009BloatWatchRCU] made
 its appearance, as did expedited RCU [PaulEMcKenney2009expeditedRCU].
-The problem of resizeable RCU-protected hash tables may now be on a path
+The problem of resizable RCU-protected hash tables may now be on a path
 to a solution [JoshTriplett2009RPHash].  A few academic researchers are now
 using RCU to solve their parallel problems [HariKannan2009DynamicAnalysisRCU].
 
 2010 produced a simpler preemptible-RCU implementation
 based on TREE_RCU [PaulEMcKenney2010SimpleOptRCU], lockdep-RCU
-[PaulEMcKenney2010LockdepRCU], another resizeable RCU-protected hash
+[PaulEMcKenney2010LockdepRCU], another resizable RCU-protected hash
 table [HerbertXu2010RCUResizeHash] (this one consuming more memory,
 but allowing arbitrary changes in hash function, as required for DoS
 avoidance in the networking code), realization of the 2009 RCU-protected
@@ -193,7 +193,7 @@
 [LinusTorvalds2011Linux2:6:38:rc1:NPigginVFS], an RCU-protected red-black
 tree using software transactional memory to protect concurrent updates
 (strange, but true!) [PhilHoward2011RCUTMRBTree], yet another variant of
-RCU-protected resizeable hash tables [Triplett:2011:RPHash], the 3.0 RCU
+RCU-protected resizable hash tables [Triplett:2011:RPHash], the 3.0 RCU
 trainwreck [PaulEMcKenney2011RCU3.0trainwreck], and Neil Brown's "Meet the
 Lockers" LWN article [NeilBrown2011MeetTheLockers].  Some academic
 work looked at debugging uses of RCU [Seyster:2011:RFA:2075416.2075425].
diff --git a/Documentation/adding-syscalls.txt b/Documentation/adding-syscalls.txt
index cc2d4ac..bbb31e0 100644
--- a/Documentation/adding-syscalls.txt
+++ b/Documentation/adding-syscalls.txt
@@ -136,7 +136,7 @@
  - xyzzyat(fd, "", ..., AT_EMPTY_PATH) is equivalent to fxyzzy(fd, ...)
 
 (For more details on the rationale of the *at() calls, see the openat(2) man
-page; for an example of AT_EMPTY_PATH, see the statat(2) man page.)
+page; for an example of AT_EMPTY_PATH, see the fstatat(2) man page.)
 
 If your new xyzzy(2) system call involves a parameter describing an offset
 within a file, make its type loff_t so that 64-bit offsets can be supported
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index 08b885d..e08a673 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -214,7 +214,7 @@
 -----------------
 
 All the commands above aren't so useful if they have to be typed in every
-time the Assabet is rebooted.  Therefore it's possible to automatize the boot
+time the Assabet is rebooted.  Therefore it's possible to automate the boot
 process using RedBoot's scripting capability.
 
 For example, I use this to boot Linux with both the kernel and the ramdisk
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index ba4b6ac..c6938e5 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -53,7 +53,9 @@
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | MMU-500         | #841119,#826419 | N/A                     |
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456    |
+| Cavium         | ThunderX SMMUv2 | #27704          | N/A		       |
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
index e840b47..e55103a 100644
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -2,6 +2,8 @@
 	- This file
 biodoc.txt
 	- Notes on the Generic Block Layer Rewrite in Linux 2.5
+biovecs.txt
+	- Immutable biovecs and biovec iterators
 capability.txt
 	- Generic Block Device Capability (/sys/block/<device>/capability)
 cfq-iosched.txt
@@ -14,6 +16,8 @@
 	- Deadline IO scheduler tunables
 ioprio.txt
 	- Block io priorities (in CFQ scheduler)
+pr.txt
+	- Block layer support for Persistent Reservations
 null_blk.txt
 	- Null block for block-layer benchmarking.
 queue-sysfs.txt
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index ff71e16..b14abf2 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -280,17 +280,9 @@
 different than user memory, since it can't be swapped out, which makes it
 possible to DoS the system by consuming too much of this precious resource.
 
-Kernel memory won't be accounted at all until limit on a group is set. This
-allows for existing setups to continue working without disruption.  The limit
-cannot be set if the cgroup have children, or if there are already tasks in the
-cgroup. Attempting to set the limit under those conditions will return -EBUSY.
-When use_hierarchy == 1 and a group is accounted, its children will
-automatically be accounted regardless of their limit value.
-
-After a group is first limited, it will be kept being accounted until it
-is removed. The memory limitation itself, can of course be removed by writing
--1 to memory.kmem.limit_in_bytes. In this case, kmem will be accounted, but not
-limited.
+Kernel memory accounting is enabled for all memory cgroups by default. But
+it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel
+at boot time. In this case, kernel memory will not be accounted at all.
 
 Kernel memory limits are not imposed for the root cgroup. Usage for the root
 cgroup may or may not be accounted. The memory used is accumulated into
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index f6215f9..ab7ca89 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -186,3 +186,11 @@
 Some work in netlink area is still being done, so things can be changed in
 2.6.15 timeframe, if it will happen, documentation will be updated for that
 kernel.
+
+/*****************************************/
+Code samples
+/*****************************************/
+
+Sample code for a connector test module and user space can be found
+in samples/connector/. To build this code, enable CONFIG_CONNECTOR
+and CONFIG_SAMPLES.
diff --git a/Documentation/devicetree/bindings/arc/eznps.txt b/Documentation/devicetree/bindings/arc/eznps.txt
new file mode 100644
index 0000000..1aa50c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/eznps.txt
@@ -0,0 +1,7 @@
+EZchip NPS Network Processor Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+Appliance main board with NPS400 ASIC.
+
+Required root node properties:
+    - compatible = "ezchip,arc-nps";
diff --git a/Documentation/devicetree/bindings/clock/microchip,pic32.txt b/Documentation/devicetree/bindings/clock/microchip,pic32.txt
new file mode 100644
index 0000000..c93d88f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,pic32.txt
@@ -0,0 +1,39 @@
+Microchip PIC32 Clock Controller Binding
+----------------------------------------
+Microchip clock controller is consists of few oscillators, PLL, multiplexer
+and few divider modules.
+
+This binding uses common clock bindings.
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible: shall be "microchip,pic32mzda-clk".
+- reg: shall contain base address and length of clock registers.
+- #clock-cells: shall be 1.
+
+Optional properties:
+- microchip,pic32mzda-sosc: shall be added only if platform has
+  secondary oscillator connected.
+
+Example:
+	rootclk: clock-controller@1f801200 {
+		compatible = "microchip,pic32mzda-clk";
+		reg = <0x1f801200 0x200>;
+		#clock-cells = <1>;
+		/* optional */
+		microchip,pic32mzda-sosc;
+	};
+
+
+The clock consumer shall specify the desired clock-output of the clock
+controller (as defined in [2]) by specifying output-id in its "clock"
+phandle cell.
+[2] include/dt-bindings/clock/microchip,pic32-clock.h
+
+For example for UART2:
+uart2: serial@2 {
+	compatible = "microchip,pic32mzda-uart";
+	reg = <>;
+	interrupts = <>;
+	clocks = <&rootclk PB2CLK>;
+};
diff --git a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
index 1396078..baf9b34 100644
--- a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
+++ b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
@@ -12,6 +12,10 @@
 - reg: Should contain DMA registers location and length.
 - interrupts: Should contain the DMA interrupts associated
 		to the DMA channels in ascending order.
+- interrupt-names: Should contain the names of the interrupt
+		   in the form "dmaXX".
+		   Use "dma-shared-all" for the common interrupt line
+		   that is shared by all dma channels.
 - #dma-cells: Must be <1>, the cell in the dmas property of the
 		client device represents the DREQ number.
 - brcm,dma-channel-mask: Bit mask representing the channels
@@ -34,13 +38,35 @@
 		     <1 24>,
 		     <1 25>,
 		     <1 26>,
+		     /* dma channel 11-14 share one irq */
 		     <1 27>,
+		     <1 27>,
+		     <1 27>,
+		     <1 27>,
+		     /* unused shared irq for all channels */
 		     <1 28>;
+	interrupt-names = "dma0",
+			  "dma1",
+			  "dma2",
+			  "dma3",
+			  "dma4",
+			  "dma5",
+			  "dma6",
+			  "dma7",
+			  "dma8",
+			  "dma9",
+			  "dma10",
+			  "dma11",
+			  "dma12",
+			  "dma13",
+			  "dma14",
+			  "dma-shared-all";
 
 	#dma-cells = <1>;
 	brcm,dma-channel-mask = <0x7f35>;
 };
 
+
 DMA clients connected to the BCM2835 DMA controller must use the format
 described in the dma.txt file, using a two-cell specifier for each channel.
 
diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt
index 276ef81..c075f59 100644
--- a/Documentation/devicetree/bindings/dma/mv-xor.txt
+++ b/Documentation/devicetree/bindings/dma/mv-xor.txt
@@ -1,7 +1,10 @@
 * Marvell XOR engines
 
 Required properties:
-- compatible: Should be "marvell,orion-xor" or "marvell,armada-380-xor"
+- compatible: Should be one of the following:
+  - "marvell,orion-xor"
+  - "marvell,armada-380-xor"
+  - "marvell,armada-3700-xor".
 - reg: Should contain registers location and length (two sets)
     the first set is the low registers, the second set the high
     registers for the XOR engine.
diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt
new file mode 100644
index 0000000..1e1dc8f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt
@@ -0,0 +1,55 @@
+* NVIDIA Tegra Audio DMA (ADMA) controller
+
+The Tegra Audio DMA controller that is used for transferring data
+between system memory and the Audio Processing Engine (APE).
+
+Required properties:
+- compatible: Must be "nvidia,tegra210-adma".
+- reg: Should contain DMA registers location and length. This should be
+  a single entry that includes all of the per-channel registers in one
+  contiguous bank.
+- interrupt-parent: Phandle to the interrupt parent controller.
+- interrupts: Should contain all of the per-channel DMA interrupts in
+  ascending order with respect to the DMA channel index.
+- clocks: Must contain one entry for the ADMA module clock
+  (TEGRA210_CLK_D_AUDIO).
+- clock-names: Must contain the name "d_audio" for the corresponding
+  'clocks' entry.
+- #dma-cells : Must be 1. The first cell denotes the receive/transmit
+  request number and should be between 1 and the maximum number of
+  requests supported. This value corresponds to the RX/TX_REQUEST_SELECT
+  fields in the ADMA_CHn_CTRL register.
+
+
+Example:
+
+adma: dma@702e2000 {
+	compatible = "nvidia,tegra210-adma";
+	reg = <0x0 0x702e2000 0x0 0x2000>;
+	interrupt-parent = <&tegra_agic>;
+	interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&tegra_car TEGRA210_CLK_D_AUDIO>;
+	clock-names = "d_audio";
+	#dma-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index 1c9d48e..9cbf5d9 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -13,6 +13,8 @@
 - clock-names: must contain "bam_clk" entry
 - qcom,ee : indicates the active Execution Environment identifier (0-7) used in
   the secure world.
+- qcom,controlled-remotely : optional, indicates that the bam is controlled by
+  remote proccessor i.e. execution environment.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index c261598..0f55832 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -13,6 +13,11 @@
 - chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
   increase from chan n->0
 - block_size: Maximum block size supported by the controller
+- data-width: Maximum data width supported by hardware per AHB master
+  (in bytes, power of 2)
+
+
+Deprecated properties:
 - data_width: Maximum data width supported by hardware per AHB master
   (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
 
@@ -38,7 +43,7 @@
 		chan_allocation_order = <1>;
 		chan_priority = <1>;
 		block_size = <0xfff>;
-		data_width = <3 3>;
+		data-width = <8 8>;
 	};
 
 DMA clients connected to the Designware DMA controller must use the format
@@ -47,8 +52,8 @@
 
 1. A phandle pointing to the DMA controller
 2. The DMA request line number
-3. Source master for transfers on allocated channel
-4. Destination master for transfers on allocated channel
+3. Memory master for transfers on allocated channel
+4. Peripheral master for transfers on allocated channel
 
 Example:
 	
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
index e4c4d47..a1f2683 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
@@ -3,18 +3,44 @@
 as two channels, one is to transmit to the video device and another is
 to receive from the video device.
 
+Xilinx AXI DMA engine, it does transfers between memory and AXI4 stream
+target devices. It can be configured to have one channel or two channels.
+If configured as two channels, one is to transmit to the device and another
+is to receive from the device.
+
+Xilinx AXI CDMA engine, it does transfers between memory-mapped source
+address and a memory-mapped destination address.
+
 Required properties:
-- compatible: Should be "xlnx,axi-vdma-1.00.a"
+- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or
+	      "xlnx,axi-cdma-1.00.a""
 - #dma-cells: Should be <1>, see "dmas" property below
 - reg: Should contain VDMA registers location and length.
-- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
+- xlnx,addrwidth: Should be the vdma addressing size in bits(ex: 32 bits).
+- dma-ranges: Should be as the following <dma_addr cpu_addr max_len>.
 - dma-channel child node: Should have at least one channel and can have up to
 	two channels per device. This node specifies the properties of each
 	DMA channel (see child node properties below).
+- clocks: Input clock specifier. Refer to common clock bindings.
+- clock-names: List of input clocks
+	For VDMA:
+	Required elements: "s_axi_lite_aclk"
+	Optional elements: "m_axi_mm2s_aclk" "m_axi_s2mm_aclk",
+			   "m_axis_mm2s_aclk", "s_axis_s2mm_aclk"
+	For CDMA:
+	Required elements: "s_axi_lite_aclk", "m_axi_aclk"
+	FOR AXIDMA:
+	Required elements: "s_axi_lite_aclk"
+	Optional elements: "m_axi_mm2s_aclk", "m_axi_s2mm_aclk",
+			   "m_axi_sg_aclk"
+
+Required properties for VDMA:
+- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
 
 Optional properties:
 - xlnx,include-sg: Tells configured for Scatter-mode in
 	the hardware.
+Optional properties for VDMA:
 - xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
 	It takes following values:
 	{1}, flush both channels
@@ -31,6 +57,7 @@
 Optional child node properties:
 - xlnx,include-dre: Tells hardware is configured for Data
 	Realignment Engine.
+Optional child node properties for VDMA:
 - xlnx,genlock-mode: Tells Genlock synchronization is
 	enabled/disabled in hardware.
 
@@ -41,8 +68,13 @@
 	compatible = "xlnx,axi-vdma-1.00.a";
 	#dma_cells = <1>;
 	reg = < 0x40030000 0x10000 >;
+	dma-ranges = <0x00000000 0x00000000 0x40000000>;
 	xlnx,num-fstores = <0x8>;
 	xlnx,flush-fsync = <0x1>;
+	xlnx,addrwidth = <0x20>;
+	clocks = <&clk 0>, <&clk 1>, <&clk 2>, <&clk 3>, <&clk 4>;
+	clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk",
+		      "m_axis_mm2s_aclk", "s_axis_s2mm_aclk";
 	dma-channel@40030000 {
 		compatible = "xlnx,axi-vdma-mm2s-channel";
 		interrupts = < 0 54 4 >;
diff --git a/Documentation/devicetree/bindings/i2c/i2c-octeon.txt b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
index dced82e..872d485 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
@@ -4,6 +4,12 @@
 
   Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
 
+  or
+
+  compatible: "cavium,octeon-7890-twsi"
+
+  Compatibility with cn78XX SOCs.
+
 - reg: The base address of the TWSI/I2C bus controller register bank.
 
 - #address-cells: Must be <1>.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
index cf8bfc9..5f0cb50 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
@@ -19,6 +19,9 @@
 - clock-frequency: desired I2C bus clock frequency in Hz. The absence of this
   property indicates the default frequency 100 kHz.
 - clocks: clock specifier.
+- dmas: Must contain a list of two references to DMA specifiers, one for
+  transmission, and one for reception.
+- dma-names: Must contain a list of two DMA names, "tx" and "rx".
 
 - i2c-scl-falling-time-ns: see i2c.txt
 - i2c-scl-internal-delay-ns: see i2c.txt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt
new file mode 100644
index 0000000..4040905
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt
@@ -0,0 +1,57 @@
+Broadcom BCM6345-style Level 1 interrupt controller
+
+This block is a first level interrupt controller that is typically connected
+directly to one of the HW INT lines on each CPU.
+
+Key elements of the hardware design include:
+
+- 32, 64 or 128 incoming level IRQ lines
+
+- Most onchip peripherals are wired directly to an L1 input
+
+- A separate instance of the register set for each CPU, allowing individual
+  peripheral IRQs to be routed to any CPU
+
+- Contains one or more enable/status word pairs per CPU
+
+- No atomic set/clear operations
+
+- No polarity/level/edge settings
+
+- No FIFO or priority encoder logic; software is expected to read all
+  2-4 status words to determine which IRQs are pending
+
+Required properties:
+
+- compatible: should be "brcm,bcm<soc>-l1-intc", "brcm,bcm6345-l1-intc"
+- reg: specifies the base physical address and size of the registers;
+  the number of supported IRQs is inferred from the size argument
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+  source, should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller(s)
+  this one is cascaded from
+- interrupts: specifies the interrupt line(s) in the interrupt-parent controller
+  node; valid values depend on the type of parent interrupt controller
+
+If multiple reg ranges and interrupt-parent entries are present on an SMP
+system, the driver will allow IRQ SMP affinity to be set up through the
+/proc/irq/ interface.  In the simplest possible configuration, only one
+reg range and one interrupt-parent is needed.
+
+The driver operates in native CPU endian by default, there is no support for
+specifying an alternative endianness.
+
+Example:
+
+periph_intc: interrupt-controller@10000000 {
+        compatible = "brcm,bcm63168-l1-intc", "brcm,bcm6345-l1-intc";
+        reg = <0x10000020 0x20>,
+              <0x10000040 0x20>;
+
+        interrupt-controller;
+        #interrupt-cells = <1>;
+
+        interrupt-parent = <&cpu_intc>;
+        interrupts = <2>, <3>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt
new file mode 100644
index 0000000..888b2b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt
@@ -0,0 +1,17 @@
+EZchip NPS Interrupt Controller
+
+Required properties:
+
+- compatible : should be "ezchip,nps400-ic"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 1.
+
+
+Example:
+
+intc: interrupt-controller {
+	compatible = "ezchip,nps400-ic";
+	interrupt-controller;
+	#interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 7180745..19fe6f2 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -16,6 +16,7 @@
                         "arm,mmu-400"
                         "arm,mmu-401"
                         "arm,mmu-500"
+                        "cavium,smmu-v2"
 
                   depending on the particular implementation and/or the
                   version of the architecture implemented.
diff --git a/Documentation/devicetree/bindings/mfd/arizona.txt b/Documentation/devicetree/bindings/mfd/arizona.txt
index 9b30011..a6e2ea41 100644
--- a/Documentation/devicetree/bindings/mfd/arizona.txt
+++ b/Documentation/devicetree/bindings/mfd/arizona.txt
@@ -1,6 +1,6 @@
 Cirrus Logic/Wolfson Microelectronics Arizona class audio SoCs
 
-These devices are audio SoCs with extensive digital capabilites and a range
+These devices are audio SoCs with extensive digital capabilities and a range
 of analogue I/O.
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/mips/brcm/soc.txt b/Documentation/devicetree/bindings/mips/brcm/soc.txt
index 7bab90c..4a7e030 100644
--- a/Documentation/devicetree/bindings/mips/brcm/soc.txt
+++ b/Documentation/devicetree/bindings/mips/brcm/soc.txt
@@ -4,7 +4,8 @@
 
 - compatible: "brcm,bcm3384", "brcm,bcm33843"
               "brcm,bcm3384-viper", "brcm,bcm33843-viper"
-              "brcm,bcm6328", "brcm,bcm6368",
+              "brcm,bcm6328", "brcm,bcm6358", "brcm,bcm6368",
+              "brcm,bcm63168", "brcm,bcm63268",
               "brcm,bcm7125", "brcm,bcm7346", "brcm,bcm7358", "brcm,bcm7360",
               "brcm,bcm7362", "brcm,bcm7420", "brcm,bcm7425"
 
diff --git a/Documentation/devicetree/bindings/mips/cavium/ciu3.txt b/Documentation/devicetree/bindings/mips/cavium/ciu3.txt
new file mode 100644
index 0000000..616862a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/ciu3.txt
@@ -0,0 +1,27 @@
+* Central Interrupt Unit v3
+
+Properties:
+- compatible: "cavium,octeon-7890-ciu3"
+
+  Compatibility with 78XX and 73XX SOCs.
+
+- interrupt-controller:  This is an interrupt controller.
+
+- reg: The base address of the CIU's register bank.
+
+- #interrupt-cells: Must be <2>.  The first cell is source number.
+  The second cell indicates the triggering semantics, and may have a
+  value of either 4 for level semantics, or 1 for edge semantics.
+
+Example:
+	interrupt-controller@1010000000000 {
+		compatible = "cavium,octeon-7890-ciu3";
+		interrupt-controller;
+		/* Interrupts are specified by two parts:
+		 * 1) Source number (20 significant bits)
+		 * 2) Trigger type: (4 == level, 1 == edge)
+		 */
+		#address-cells = <0>;
+		#interrupt-cells = <2>;
+		reg = <0x10100 0x00000000 0x0 0xb0000000>;
+	};
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
index 3be80c6..83aeb1f 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -4,8 +4,8 @@
 and thus inherits all the common properties defined in designware-pcie.txt.
 
 Required properties:
-- compatible: "fsl,imx6q-pcie"
-- reg: base addresse and length of the pcie controller
+- compatible: "fsl,imx6q-pcie", "fsl,imx6sx-pcie", "fsl,imx6qp-pcie"
+- reg: base address and length of the PCIe controller
 - interrupts: A list of interrupt outputs of the controller. Must contain an
   entry for each entry in the interrupt-names property.
 - interrupt-names: Must include the following entries:
@@ -19,6 +19,20 @@
 - fsl,tx-deemph-gen2-6db: Gen2 (6db) De-emphasis value. Default: 20
 - fsl,tx-swing-full: Gen2 TX SWING FULL value. Default: 127
 - fsl,tx-swing-low: TX launch amplitude swing_low value. Default: 127
+- fsl,max-link-speed: Specify PCI gen for link capability. Must be '2' for
+  gen2, otherwise will default to gen1. Note that the IMX6 LVDS clock outputs
+  do not meet gen2 jitter requirements and thus for gen2 capability a gen2
+  compliant clock generator should be used and configured.
+- reset-gpio: Should specify the GPIO for controlling the PCI bus device reset
+  signal. It's not polarity aware and defaults to active-low reset sequence
+  (L=reset state, H=operation state).
+- reset-gpio-active-high: If present then the reset sequence using the GPIO
+  specified in the "reset-gpio" property is reversed (H=reset state,
+  L=operation state).
+
+Additional required properties for imx6sx-pcie:
+- clock names: Must include the following additional entries:
+	- "pcie_inbound_axi"
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/pci/pci-armada8k.txt b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
new file mode 100644
index 0000000..598533a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
@@ -0,0 +1,38 @@
+* Marvell Armada 7K/8K PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "marvell,armada8k-pcie"
+- reg: must contain two register regions
+   - the control register region
+   - the config space region
+- reg-names:
+   - "ctrl" for the control register region
+   - "config" for the config space region
+- interrupts: Interrupt specifier for the PCIe controler
+- clocks: reference to the PCIe controller clock
+
+Example:
+
+	pcie@f2600000 {
+		compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
+		reg = <0 0xf2600000 0 0x10000>, <0 0xf6f00000 0 0x80000>;
+		reg-names = "ctrl", "config";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		dma-coherent;
+
+		bus-range = <0 0xff>;
+		ranges = <0x81000000 0 0xf9000000 0  0xf9000000 0 0x10000	/* downstream I/O */
+			  0x82000000 0 0xf6000000 0  0xf6000000 0 0xf00000>;	/* non-prefetchable memory */
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+		num-lanes = <1>;
+		clocks = <&cpm_syscon0 1 13>;
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt
index 54eae29..d08a4d5 100644
--- a/Documentation/devicetree/bindings/pci/pci-keystone.txt
+++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt
@@ -56,6 +56,7 @@
 	phy-names: name of the Generic Keystine SerDes phy for PCI
 	  - If boot loader already does PCI link establishment, then phys and
 	    phy-names shouldn't be present.
+	interrupts: platform interrupt for error interrupts.
 
 Designware DT Properties not applicable for Keystone PCI
 
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
index ffadb7a..74e6ec0 100644
--- a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
@@ -72,8 +72,8 @@
 
 The pin configuration parameters use the generic pinconf bindings defined in
 pinctrl-bindings.txt in this directory. The supported parameters are
-bias-disable, bias-pull-up, bias-pull-down and power-source. For pins that
-have a configurable I/O voltage, the power-source value should be the
+bias-disable, bias-pull-up, bias-pull-down, drive strength and power-source. For
+pins that have a configurable I/O voltage, the power-source value should be the
 nominal I/O voltage in millivolts.
 
 
diff --git a/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt b/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt
new file mode 100644
index 0000000..65b38bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt
@@ -0,0 +1,29 @@
+* Microchip Universal Asynchronous Receiver Transmitter (UART)
+
+Required properties:
+- compatible: Should be "microchip,pic32mzda-uart"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt
+- clocks: Phandle to the clock.
+          See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+- pinctrl-names: A pinctrl state names "default" must be defined.
+- pinctrl-0: Phandle referencing pin configuration of the UART peripheral.
+             See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+Optional properties:
+- cts-gpios: CTS pin for UART
+
+Example:
+	uart1: serial@1f822000 {
+		compatible = "microchip,pic32mzda-uart";
+		reg = <0x1f822000 0x50>;
+		interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+			<113 IRQ_TYPE_LEVEL_HIGH>,
+			<114 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&PBCLK2>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_uart1
+				&pinctrl_uart1_cts
+				&pinctrl_uart1_rts>;
+		cts-gpios = <&gpio1 15 0>;
+	};
diff --git a/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt b/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt
new file mode 100644
index 0000000..55b53e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt
@@ -0,0 +1,51 @@
+Texas Instruments DaVinci McBSP module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This binding describes the "Multi-channel Buffered Serial Port" (McBSP)
+audio interface found in some TI DaVinci processors like the OMAP-L138 or AM180x.
+
+
+Required properties:
+~~~~~~~~~~~~~~~~~~~~
+- compatible :
+        "ti,da850-mcbsp" : for DA850, AM180x and OPAM-L138 platforms
+
+- reg : physical base address and length of the controller memory mapped
+        region(s).
+- reg-names : Should contain:
+        * "mpu" for the main registers (required).
+        * "dat" for the data FIFO (optional).
+
+- dmas: three element list of DMA controller phandles, DMA request line and
+	TC channel ordered triplets.
+- dma-names: identifier string for each DMA request line in the dmas property.
+	These strings correspond 1:1 with the ordered pairs in dmas. The dma
+	identifiers must be "rx" and "tx".
+
+Optional properties:
+~~~~~~~~~~~~~~~~~~~~
+- interrupts : Interrupt numbers for McBSP
+- interrupt-names : Known interrupt names are "rx" and "tx"
+
+- pinctrl-0: Should specify pin control group used for this controller.
+- pinctrl-names: Should contain only one value - "default", for more details
+        please refer to pinctrl-bindings.txt
+
+Example (AM1808):
+~~~~~~~~~~~~~~~~~
+
+mcbsp0: mcbsp@1d10000 {
+	compatible = "ti,da850-mcbsp";
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcbsp0_pins>;
+
+	reg = 	<0x00110000 0x1000>,
+		<0x00310000 0x1000>;
+	reg-names = "mpu", "dat";
+	interrupts = <97 98>;
+	interrupts-names = "rx", "tx";
+	dmas = <&edma0 3 1
+		&edma0 2 1>;
+	dma-names = "tx", "rx";
+	status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl-sai.txt b/Documentation/devicetree/bindings/sound/fsl-sai.txt
index 044e5d7..740b467 100644
--- a/Documentation/devicetree/bindings/sound/fsl-sai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-sai.txt
@@ -7,8 +7,8 @@
 
 Required properties:
 
-  - compatible		: Compatible list, contains "fsl,vf610-sai" or
-			  "fsl,imx6sx-sai".
+  - compatible		: Compatible list, contains "fsl,vf610-sai",
+			  "fsl,imx6sx-sai" or "fsl,imx6ul-sai"
 
   - reg			: Offset and length of the register set for the device.
 
@@ -48,6 +48,11 @@
 			  receive data by following their own bit clocks and
 			  frame sync clocks separately.
 
+Optional properties (for mx6ul):
+
+  - fsl,sai-mclk-direction-output: This is a boolean property. If present,
+			 indicates that SAI will output the SAI MCLK clock.
+
 Note:
 - If both fsl,sai-asynchronous and fsl,sai-synchronous-rx are absent, the
   default synchronous mode (sync Rx with Tx) will be used, which means both
diff --git a/Documentation/devicetree/bindings/sound/pcm5102a.txt b/Documentation/devicetree/bindings/sound/pcm5102a.txt
new file mode 100644
index 0000000..c63ab0b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/pcm5102a.txt
@@ -0,0 +1,13 @@
+PCM5102a audio CODECs
+
+These devices does not use I2C or SPI.
+
+Required properties:
+
+  - compatible : set as "ti,pcm5102a"
+
+Examples:
+
+	pcm5102a: pcm5102a {
+		compatible = "ti,pcm5102a";
+	};
diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt
new file mode 100644
index 0000000..c8c03d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt
@@ -0,0 +1,15 @@
+NPS Network Processor
+
+Required properties:
+
+- compatible :	should be "ezchip,nps400-timer"
+
+Clocks required for compatible = "ezchip,nps400-timer":
+- clocks : Must contain a single entry describing the clock input
+
+Example:
+
+timer {
+	compatible = "ezchip,nps400-timer";
+	clocks = <&sysclk>;
+};
diff --git a/Documentation/devicetree/bindings/timer/snps,arc-timer.txt b/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
new file mode 100644
index 0000000..4ef0246
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
@@ -0,0 +1,31 @@
+Synopsys ARC Local Timer with Interrupt Capabilities
+- Found on all ARC CPUs (ARC700/ARCHS)
+- Can be optionally programmed to interrupt on Limit
+- Two idential copies TIMER0 and TIMER1 exist in ARC cores and historically
+  TIMER0 used as clockevent provider (true for all ARC cores)
+  TIMER1 used for clocksource (mandatory for ARC700, optional for ARC HS)
+
+Required properties:
+
+- compatible : should be "snps,arc-timer"
+- interrupts : single Interrupt going into parent intc
+	       (16 for ARCHS cores, 3 for ARC700 cores)
+- clocks     : phandle to the source clock
+
+Optional properties:
+
+- interrupt-parent : phandle to parent intc
+
+Example:
+
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <3>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&core_clk>;
+	};
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
new file mode 100644
index 0000000..b6cd1b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
@@ -0,0 +1,14 @@
+Synopsys ARC Free Running 64-bit Global Timer for ARC HS CPUs
+- clocksource provider for SMP SoC
+
+Required properties:
+
+- compatible : should be "snps,archs-gfrc"
+- clocks     : phandle to the source clock
+
+Example:
+
+	gfrc {
+		compatible = "snps,archs-gfrc";
+		clocks = <&core_clk>;
+	};
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt b/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
new file mode 100644
index 0000000..47bd7a7
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
@@ -0,0 +1,14 @@
+Synopsys ARC Free Running 64-bit Local Timer for ARC HS CPUs
+- clocksource provider for UP SoC
+
+Required properties:
+
+- compatible : should be "snps,archs-rtc"
+- clocks     : phandle to the source clock
+
+Example:
+
+	rtc {
+		compatible = "snps,arc-rtc";
+		clocks = <&core_clk>;
+	};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 3af48e8..316412d 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -72,6 +72,8 @@
 dlg	Dialog Semiconductor
 dlink	D-Link Corporation
 dmo	Data Modul AG
+dptechnics	DPTechnics
+dragino	Dragino Technology Co., Limited
 ea	Embedded Artists AB
 ebv	EBV Elektronik
 edt	Emerging Display Technologies
@@ -88,6 +90,7 @@
 everest	Everest Semiconductor Co. Ltd.
 everspin	Everspin Technologies, Inc.
 excito	Excito
+ezchip	EZchip Semiconductor
 fcs	Fairchild Semiconductor
 firefly	Firefly
 focaltech	FocalTech Systems Co.,Ltd
@@ -175,6 +178,7 @@
 nxp	NXP Semiconductors
 okaya	Okaya Electric America, Inc.
 olimex	OLIMEX Ltd.
+onion	Onion Corporation
 onnn	ON Semiconductor Corp.
 opencores	OpenCores.org
 option	Option NV
diff --git a/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt b/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt
new file mode 100644
index 0000000..852f694
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt
@@ -0,0 +1,19 @@
+* Microchip PIC32 Deadman Timer
+
+The deadman timer is used to reset the processor in the event of a software
+malfunction. It is a free-running instruction fetch timer, which is clocked
+whenever an instruction fetch occurs until a count match occurs.
+
+Required properties:
+- compatible: must be "microchip,pic32mzda-dmt".
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- clocks: phandle of parent clock (should be &PBCLK7).
+
+Example:
+
+	watchdog@1f800a00 {
+		compatible = "microchip,pic32mzda-dmt";
+		reg = <0x1f800a00 0x80>;
+		clocks = <&PBCLK7>;
+	};
diff --git a/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt b/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt
new file mode 100644
index 0000000..d140103
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt
@@ -0,0 +1,18 @@
+* Microchip PIC32 Watchdog Timer
+
+When enabled, the watchdog peripheral can be used to reset the device if the
+WDT is not cleared periodically in software.
+
+Required properties:
+- compatible: must be "microchip,pic32mzda-wdt".
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- clocks: phandle of source clk. should be <&LPRC> clk.
+
+Example:
+
+	watchdog@1f800800 {
+		compatible = "microchip,pic32mzda-wdt";
+		reg = <0x1f800800 0x200>;
+		clocks = <&LPRC>;
+	};
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 73b98df..7281fb4 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -268,6 +268,9 @@
   devm_iio_trigger_alloc()
   devm_iio_trigger_free()
 
+INPUT
+  devm_input_allocate_device()
+
 IO region
   devm_release_mem_region()
   devm_release_region()
@@ -328,6 +331,8 @@
 PINCTRL
   devm_pinctrl_get()
   devm_pinctrl_put()
+  devm_pinctrl_register()
+  devm_pinctrl_unregister()
 
 PWM
   devm_pwm_get()
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README
index 2d5622f..a5478840 100644
--- a/Documentation/filesystems/cifs/README
+++ b/Documentation/filesystems/cifs/README
@@ -272,7 +272,7 @@
 		same domain (e.g. running winbind or nss_ldap) and
 		the server supports the Unix Extensions then the uid
 		and gid can be retrieved from the server (and uid
-		and gid would not have to be specifed on the mount. 
+		and gid would not have to be specified on the mount.
 		For servers which do not support the CIFS Unix
 		extensions, the default uid (and gid) returned on lookup
 		of existing files will be the uid (gid) of the person
diff --git a/Documentation/filesystems/pohmelfs/design_notes.txt b/Documentation/filesystems/pohmelfs/design_notes.txt
index 8aef9133..106d17f 100644
--- a/Documentation/filesystems/pohmelfs/design_notes.txt
+++ b/Documentation/filesystems/pohmelfs/design_notes.txt
@@ -29,7 +29,7 @@
  * Read request (data read, directory listing, lookup requests) balancing between multiple servers.
  * Write requests are replicated to multiple servers and completed only when all of them are acked.
  * Ability to add and/or remove servers from the working set at run-time.
- * Strong authentification and possible data encryption in network channel.
+ * Strong authentication and possible data encryption in network channel.
  * Extended attributes support.
 
 POHMELFS is based on transactions, which are potentially long-standing objects that live
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt
index 4086797..4f3d6a8 100644
--- a/Documentation/filesystems/qnx6.txt
+++ b/Documentation/filesystems/qnx6.txt
@@ -16,7 +16,7 @@
 concepts of blocks, inodes and directories.
 On QNX it is possible to create little endian and big endian qnx6 filesystems.
 This feature makes it possible to create and use a different endianness fs
-for the target (QNX is used on quite a range of embedded systems) plattform
+for the target (QNX is used on quite a range of embedded systems) platform
 running on a different endianness.
 The Linux driver handles endianness transparently. (LE and BE)
 
diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README
index 71f8685..cafdca8 100644
--- a/Documentation/firmware_class/README
+++ b/Documentation/firmware_class/README
@@ -20,7 +20,7 @@
 
  1), kernel(driver):
 	- calls request_firmware(&fw_entry, $FIRMWARE, device)
-	- kernel searchs the fimware image with name $FIRMWARE directly
+	- kernel searches the firmware image with name $FIRMWARE directly
 	in the below search path of root filesystem:
 		User customized search path by module parameter 'path'[1]
 		"/lib/firmware/updates/" UTS_RELEASE,
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
index 915f320..f1d4fe4 100644
--- a/Documentation/hwmon/abituguru
+++ b/Documentation/hwmon/abituguru
@@ -25,7 +25,7 @@
 	1) For revisions 2 and 3 uGuru's the driver can autodetect the
 	   sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
 	   this doesnot always work. For these uGuru's the autodection can
-	   be overriden with the bank1_types module param. For all 3 known
+	   be overridden with the bank1_types module param. For all 3 known
 	   revison 1 motherboards the correct use of this param is:
 	   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
 	   You may also need to specify the fan_sensors option for these boards
diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology
new file mode 100644
index 0000000..e0aefee
--- /dev/null
+++ b/Documentation/i2c/i2c-topology
@@ -0,0 +1,370 @@
+I2C topology
+============
+
+There are a couple of reasons for building more complex i2c topologies
+than a straight-forward i2c bus with one adapter and one or more devices.
+
+1. A mux may be needed on the bus to prevent address collisions.
+
+2. The bus may be accessible from some external bus master, and arbitration
+   may be needed to determine if it is ok to access the bus.
+
+3. A device (particularly RF tuners) may want to avoid the digital noise
+   from the i2c bus, at least most of the time, and sits behind a gate
+   that has to be operated before the device can be accessed.
+
+Etc
+
+These constructs are represented as i2c adapter trees by Linux, where
+each adapter has a parent adapter (except the root adapter) and zero or
+more child adapters. The root adapter is the actual adapter that issues
+i2c transfers, and all adapters with a parent are part of an "i2c-mux"
+object (quoted, since it can also be an arbitrator or a gate).
+
+Depending of the particular mux driver, something happens when there is
+an i2c transfer on one of its child adapters. The mux driver can
+obviously operate a mux, but it can also do arbitration with an external
+bus master or open a gate. The mux driver has two operations for this,
+select and deselect. select is called before the transfer and (the
+optional) deselect is called after the transfer.
+
+
+Locking
+=======
+
+There are two variants of locking available to i2c muxes, they can be
+mux-locked or parent-locked muxes. As is evident from below, it can be
+useful to know if a mux is mux-locked or if it is parent-locked. The
+following list was correct at the time of writing:
+
+In drivers/i2c/muxes/
+i2c-arb-gpio-challenge    Parent-locked
+i2c-mux-gpio              Normally parent-locked, mux-locked iff
+                          all involved gpio pins are controlled by the
+                          same i2c root adapter that they mux.
+i2c-mux-pca9541           Parent-locked
+i2c-mux-pca954x           Parent-locked
+i2c-mux-pinctrl           Normally parent-locked, mux-locked iff
+                          all involved pinctrl devices are controlled
+                          by the same i2c root adapter that they mux.
+i2c-mux-reg               Parent-locked
+
+In drivers/iio/
+imu/inv_mpu6050/          Mux-locked
+
+In drivers/media/
+dvb-frontends/m88ds3103   Parent-locked
+dvb-frontends/rtl2830     Parent-locked
+dvb-frontends/rtl2832     Mux-locked
+dvb-frontends/si2168      Mux-locked
+usb/cx231xx/              Parent-locked
+
+
+Mux-locked muxes
+----------------
+
+Mux-locked muxes does not lock the entire parent adapter during the
+full select-transfer-deselect transaction, only the muxes on the parent
+adapter are locked. Mux-locked muxes are mostly interesting if the
+select and/or deselect operations must use i2c transfers to complete
+their tasks. Since the parent adapter is not fully locked during the
+full transaction, unrelated i2c transfers may interleave the different
+stages of the transaction. This has the benefit that the mux driver
+may be easier and cleaner to implement, but it has some caveats.
+
+ML1. If you build a topology with a mux-locked mux being the parent
+     of a parent-locked mux, this might break the expectation from the
+     parent-locked mux that the root adapter is locked during the
+     transaction.
+
+ML2. It is not safe to build arbitrary topologies with two (or more)
+     mux-locked muxes that are not siblings, when there are address
+     collisions between the devices on the child adapters of these
+     non-sibling muxes.
+
+     I.e. the select-transfer-deselect transaction targeting e.g. device
+     address 0x42 behind mux-one may be interleaved with a similar
+     operation targeting device address 0x42 behind mux-two. The
+     intension with such a topology would in this hypothetical example
+     be that mux-one and mux-two should not be selected simultaneously,
+     but mux-locked muxes do not guarantee that in all topologies.
+
+ML3. A mux-locked mux cannot be used by a driver for auto-closing
+     gates/muxes, i.e. something that closes automatically after a given
+     number (one, in most cases) of i2c transfers. Unrelated i2c transfers
+     may creep in and close prematurely.
+
+ML4. If any non-i2c operation in the mux driver changes the i2c mux state,
+     the driver has to lock the root adapter during that operation.
+     Otherwise garbage may appear on the bus as seen from devices
+     behind the mux, when an unrelated i2c transfer is in flight during
+     the non-i2c mux-changing operation.
+
+
+Mux-locked Example
+------------------
+
+                   .----------.     .--------.
+    .--------.     |   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  .--------.
+                |  '----------'  '--| dev D2 |
+                |  .--------.       '--------'
+                '--| dev D3 |
+                   '--------'
+
+When there is an access to D1, this happens:
+
+ 1. Someone issues an i2c-transfer to D1.
+ 2. M1 locks muxes on its parent (the root adapter in this case).
+ 3. M1 calls ->select to ready the mux.
+ 4. M1 (presumably) does some i2c-transfers as part of its select.
+    These transfers are normal i2c-transfers that locks the parent
+    adapter.
+ 5. M1 feeds the i2c-transfer from step 1 to its parent adapter as a
+    normal i2c-transfer that locks the parent adapter.
+ 6. M1 calls ->deselect, if it has one.
+ 7. Same rules as in step 4, but for ->deselect.
+ 8. M1 unlocks muxes on its parent.
+
+This means that accesses to D2 are lockout out for the full duration
+of the entire operation. But accesses to D3 are possibly interleaved
+at any point.
+
+
+Parent-locked muxes
+-------------------
+
+Parent-locked muxes lock the parent adapter during the full select-
+transfer-deselect transaction. The implication is that the mux driver
+has to ensure that any and all i2c transfers through that parent
+adapter during the transaction are unlocked i2c transfers (using e.g.
+__i2c_transfer), or a deadlock will follow. There are a couple of
+caveats.
+
+PL1. If you build a topology with a parent-locked mux being the child
+     of another mux, this might break a possible assumption from the
+     child mux that the root adapter is unused between its select op
+     and the actual transfer (e.g. if the child mux is auto-closing
+     and the parent mux issus i2c-transfers as part of its select).
+     This is especially the case if the parent mux is mux-locked, but
+     it may also happen if the parent mux is parent-locked.
+
+PL2. If select/deselect calls out to other subsystems such as gpio,
+     pinctrl, regmap or iio, it is essential that any i2c transfers
+     caused by these subsystems are unlocked. This can be convoluted to
+     accomplish, maybe even impossible if an acceptably clean solution
+     is sought.
+
+
+Parent-locked Example
+---------------------
+
+                   .----------.     .--------.
+    .--------.     |  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  .--------.
+                |  '----------'  '--| dev D2 |
+                |  .--------.       '--------'
+                '--| dev D3 |
+                   '--------'
+
+When there is an access to D1, this happens:
+
+ 1. Someone issues an i2c-transfer to D1.
+ 2. M1 locks muxes on its parent (the root adapter in this case).
+ 3. M1 locks its parent adapter.
+ 4. M1 calls ->select to ready the mux.
+ 5. If M1 does any i2c-transfers (on this root adapter) as part of
+    its select, those transfers must be unlocked i2c-transfers so
+    that they do not deadlock the root adapter.
+ 6. M1 feeds the i2c-transfer from step 1 to the root adapter as an
+    unlocked i2c-transfer, so that it does not deadlock the parent
+    adapter.
+ 7. M1 calls ->deselect, if it has one.
+ 8. Same rules as in step 5, but for ->deselect.
+ 9. M1 unlocks its parent adapter.
+10. M1 unlocks muxes on its parent.
+
+
+This means that accesses to both D2 and D3 are locked out for the full
+duration of the entire operation.
+
+
+Complex Examples
+================
+
+Parent-locked mux as parent of parent-locked mux
+------------------------------------------------
+
+This is a useful topology, but it can be bad.
+
+                   .----------.     .----------.     .--------.
+    .--------.     |  parent- |-----|  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When any device is accessed, all other devices are locked out for
+the full duration of the operation (both muxes lock their parent,
+and specifically when M2 requests its parent to lock, M1 passes
+the buck to the root adapter).
+
+This topology is bad if M2 is an auto-closing mux and M1->select
+issues any unlocked i2c transfers on the root adapter that may leak
+through and be seen by the M2 adapter, thus closing M2 prematurely.
+
+
+Mux-locked mux as parent of mux-locked mux
+------------------------------------------
+
+This is a good topology.
+
+                   .----------.     .----------.     .--------.
+    .--------.     |   mux-   |-----|   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When device D1 is accessed, accesses to D2 are locked out for the
+full duration of the operation (muxes on the top child adapter of M1
+are locked). But accesses to D3 and D4 are possibly interleaved at
+any point. Accesses to D3 locks out D1 and D2, but accesses to D4
+are still possibly interleaved.
+
+
+Mux-locked mux as parent of parent-locked mux
+---------------------------------------------
+
+This is probably a bad topology.
+
+                   .----------.     .----------.     .--------.
+    .--------.     |   mux-   |-----|  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When device D1 is accessed, accesses to D2 and D3 are locked out
+for the full duration of the operation (M1 locks child muxes on the
+root adapter). But accesses to D4 are possibly interleaved at any
+point.
+
+This kind of topology is generally not suitable and should probably
+be avoided. The reason is that M2 probably assumes that there will
+be no i2c transfers during its calls to ->select and ->deselect, and
+if there are, any such transfers might appear on the slave side of M2
+as partial i2c transfers, i.e. garbage or worse. This might cause
+device lockups and/or other problems.
+
+The topology is especially troublesome if M2 is an auto-closing
+mux. In that case, any interleaved accesses to D4 might close M2
+prematurely, as might any i2c-transfers part of M1->select.
+
+But if M2 is not making the above stated assumption, and if M2 is not
+auto-closing, the topology is fine.
+
+
+Parent-locked mux as parent of mux-locked mux
+---------------------------------------------
+
+This is a good topology.
+
+                   .----------.     .----------.     .--------.
+    .--------.     |  parent- |-----|   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When D1 is accessed, accesses to D2 are locked out for the full
+duration of the operation (muxes on the top child adapter of M1
+are locked). Accesses to D3 and D4 are possibly interleaved at
+any point, just as is expected for mux-locked muxes.
+
+When D3 or D4 are accessed, everything else is locked out. For D3
+accesses, M1 locks the root adapter. For D4 accesses, the root
+adapter is locked directly.
+
+
+Two mux-locked sibling muxes
+----------------------------
+
+This is a good topology.
+
+                                    .--------.
+                   .----------.  .--| dev D1 |
+                   |   mux-   |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |   mux-   |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When D1 is accessed, accesses to D2, D3 and D4 are locked out. But
+accesses to D5 may be interleaved at any time.
+
+
+Two parent-locked sibling muxes
+-------------------------------
+
+This is a good topology.
+
+                                   .--------.
+                   .----------.  .--| dev D1 |
+                   |  parent- |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |  parent- |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When any device is accessed, accesses to all other devices are locked
+out.
+
+
+Mux-locked and parent-locked sibling muxes
+------------------------------------------
+
+This is a good topology.
+
+                                   .--------.
+                   .----------.  .--| dev D1 |
+                   |   mux-   |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |  parent- |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When D1 or D2 are accessed, accesses to D3 and D4 are locked out while
+accesses to D5 may interleave. When D3 or D4 are accessed, accesses to
+all other devices are locked out.
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index f2cfe26..47c1dd9 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -25,7 +25,7 @@
   main interface for a subinterface is in "parent."
 
   Child interface create/delete can also be done using IPoIB's
-  rtnl_link_ops, where childs created using either way behave the same.
+  rtnl_link_ops, where children created using either way behave the same.
 
 Datagram vs Connected modes
 
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 52ef02b..581c14b 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -290,12 +290,6 @@
   - このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
     す。このプロセスはだいたい 6週間継続します。
 
-  - 各リリースでの既知の後戻り問題(regression: このリリースの中で新規
-    に作り込まれた問題を指す) はその都度 Linux-kernel メーリングリスト
-    に投稿されます。ゴールとしては、カーネルが 「準備ができた」と宣言
-    する前にこのリストの長さをゼロに減らすことですが、現実には、数個の
-    後戻り問題がリリース時にたびたび残ってしまいます。
-
 Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ
 て書いたことをここで言っておくことは価値があります-
   「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt
index fe217c1..1dafc52 100644
--- a/Documentation/kernel-docs.txt
+++ b/Documentation/kernel-docs.txt
@@ -194,15 +194,15 @@
        simple---most of the complexity (other than talking to the
        hardware) involves managing network packets in memory".
        
-     * Title: "Writing Linux Device Drivers"
+     * Title: "Linux Kernel Hackers' Guide"
        Author: Michael K. Johnson.
-       URL: http://users.evitech.fi/~tk/rtos/writing_linux_device_d.html
-       Keywords: files, VFS, file operations, kernel interface, character
-       vs block devices, I/O access, hardware interrupts, DMA, access to
-       user memory, memory allocation, timers.
-       Description: Introductory 50-minutes (sic) tutorial on writing
-       device drivers. 12 pages written by the same author of the "Kernel
-       Hackers' Guide" which give a very good overview of the topic.
+       URL: http://www.tldp.org/LDP/khg/HyperNews/get/khg.html
+       Keywords: device drivers, files, VFS, kernel interface, character vs
+       block devices, hardware interrupts, scsi, DMA, access to user memory,
+       memory allocation, timers.
+       Description: A guide designed to help you get up to speed on the
+       concepts that are not intuitevly obvious, and to document the internal
+       structures of Linux.
        
      * Title: "The Venus kernel interface"
        Author: Peter J. Braam.
@@ -250,7 +250,7 @@
 
      * Title: "Analysis of the Ext2fs structure"
        Author: Louis-Dominique Dubeau.
-       URL: http://www.nondot.org/sabre/os/files/FileSystems/ext2fs/
+       URL: http://teaching.csse.uwa.edu.au/units/CITS2002/fs-ext2/
        Keywords: ext2, filesystem, ext2fs.
        Description: Description of ext2's blocks, directories, inodes,
        bitmaps, invariants...
@@ -266,14 +266,14 @@
 
      * Title: "Kernel API changes from 2.0 to 2.2"
        Author: Richard Gooch.
-       URL:
-       http://www.linuxhq.com/guides/LKMPG/node28.html 
+       URL: http://www.safe-mbox.com/~rgooch/linux/docs/porting-to-2.2.html
        Keywords: 2.2, changes.
        Description: Kernel functions/structures/variables which changed
        from 2.0.x to 2.2.x.
 
      * Title: "Kernel API changes from 2.2 to 2.4"
        Author: Richard Gooch.
+       URL: http://www.safe-mbox.com/~rgooch/linux/docs/porting-to-2.4.html
        Keywords: 2.4, changes.
        Description: Kernel functions/structures/variables which changed
        from 2.2.x to 2.4.x.
@@ -609,6 +609,13 @@
        Pages: 432.
        ISBN: 0-201-63338-8
 
+     * Title: "Linux Kernel Development, 3rd Edition"
+       Author: Robert Love
+       Publisher: Addison-Wesley.
+       Date: July, 2010
+       Pages: 440
+       ISBN: 978-0672329463
+
      MISCELLANEOUS:
 
      * Name: linux/Documentation
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2edb27b..18d7f5b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1787,6 +1787,13 @@
 			PCI device 00:14.0 write the parameter as:
 				ivrs_hpet[0]=00:14.0
 
+	ivrs_acpihid	[HW,X86_64]
+			Provide an override to the ACPI-HID:UID<->DEVICE-ID
+			mapping provided in the IVRS ACPI table. For
+			example, to map UART-HID:UID AMD0020:0 to
+			PCI device 00:14.5 write the parameter as:
+				ivrs_acpihid[00:14.5]=AMD0020:0
+
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joystick.txt.
 
diff --git a/Documentation/ko_KR/HOWTO b/Documentation/ko_KR/HOWTO
index 5a81b39..9a3e659 100644
--- a/Documentation/ko_KR/HOWTO
+++ b/Documentation/ko_KR/HOWTO
@@ -236,9 +236,9 @@
   - 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
     메인테이너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
     몇 주 동안 -next 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
-    선호되는 방법은  git(커널의 소스 관리 툴, 더 많은 정보들은 http://git.or.cz/
-    에서 참조할 수 있다)를 사용하는 것이지만 순수한 패치파일의 형식으로 보내는
-    것도 무관하다.
+    선호되는 방법은  git(커널의 소스 관리 툴, 더 많은 정보들은
+    http://git-scm.com/ 에서 참조할 수 있다)를 사용하는 것이지만 순수한
+    패치파일의 형식으로 보내는 것도 무관하다.
   - 2주 후에 -rc1 커널이 배포되며 지금부터는 전체 커널의 안정성에 영향을
     미칠수 있는 새로운 기능들을 포함하지 않는 패치들만이 추가될 수 있다.
     완전히 새로운 드라이버(혹은 파일시스템)는 -rc1 이후에만 받아들여진다는
@@ -253,8 +253,6 @@
     것이다.
   - 이러한 프로세스는 커널이 "준비(ready)"되었다고 여겨질때까지 계속된다.
     프로세스는 대체로 6주간 지속된다.
-  - 각 -rc 배포에 있는 알려진 회귀의 목록들은 다음 URI에 남겨진다.
-    http://kernelnewbies.org/known_regressions
 
 커널 배포에 있어서 언급할만한 가치가 있는 리눅스 커널 메일링 리스트의
 Andrew Morton의 글이 있다.
diff --git a/Documentation/laptops/toshiba_haps.txt b/Documentation/laptops/toshiba_haps.txt
index 11dbcfd..0c1d88d 100644
--- a/Documentation/laptops/toshiba_haps.txt
+++ b/Documentation/laptops/toshiba_haps.txt
@@ -19,7 +19,7 @@
 --------------
 
 This driver provides support for the accelerometer found in various Toshiba
-laptops, being called "Toshiba HDD Protection - Shock Sensor" officialy,
+laptops, being called "Toshiba HDD Protection - Shock Sensor" officially,
 and detects laptops automatically with this device.
 On Windows, Toshiba provided software monitors this device and provides
 automatic HDD protection (head unload) on sudden moves or harsh vibrations,
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index ea45dd3..285c54f 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -69,9 +69,9 @@
 
   IMPORTANT NOTE : in the code some length checks are missing because certain
   instructions are called under the assumption that a certain number of bytes
-  follow because it has already been garanteed before parsing the instructions.
+  follow because it has already been guaranteed before parsing the instructions.
   They just have to "refill" this credit if they consume extra bytes. This is
-  an implementation design choice independant on the algorithm or encoding.
+  an implementation design choice independent on the algorithm or encoding.
 
 Byte sequences
 
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
index c100c71..3888327 100644
--- a/Documentation/md-cluster.txt
+++ b/Documentation/md-cluster.txt
@@ -316,3 +316,9 @@
  nodes are using the raid which is achieved by lock all bitmap
  locks within the cluster, and also those locks are unlocked
  accordingly.
+
+7. Unsupported features
+
+There are somethings which are not supported by cluster MD yet.
+
+- update size and change array_sectors.
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index a9ba672..4623bc0 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -6,3 +6,5 @@
         - info on SD and MMC device partitions
 mmc-async-req.txt
         - info on mmc asynchronous requests
+mmc-tools.txt
+	- info on mmc-utils tools
diff --git a/Documentation/mmc/mmc-tools.txt b/Documentation/mmc/mmc-tools.txt
new file mode 100644
index 0000000..735509c
--- /dev/null
+++ b/Documentation/mmc/mmc-tools.txt
@@ -0,0 +1,34 @@
+MMC tools introduction
+======================
+
+There is one MMC test tools called mmc-utils, which is maintained by Chris Ball,
+you can find it at the below public git repository:
+http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/
+
+Functions
+=========
+
+The mmc-utils tools can do the following:
+ - Print and parse extcsd data.
+ - Determine the eMMC writeprotect status.
+ - Set the eMMC writeprotect status.
+ - Set the eMMC data sector size to 4KB by disabling emulation.
+ - Create general purpose partition.
+ - Enable the enhanced user area.
+ - Enable write reliability per partition.
+ - Print the response to STATUS_SEND (CMD13).
+ - Enable the boot partition.
+ - Set Boot Bus Conditions.
+ - Enable the eMMC BKOPS feature.
+ - Permanently enable the eMMC H/W Reset feature.
+ - Permanently disable the eMMC H/W Reset feature.
+ - Send Sanitize command.
+ - Program authentication key for the device.
+ - Counter value for the rpmb device will be read to stdout.
+ - Read from rpmb device to output.
+ - Write to rpmb device from data file.
+ - Enable the eMMC cache feature.
+ - Disable the eMMC cache feature.
+ - Print and parse CID data.
+ - Print and parse CSD data.
+ - Print and parse SCR data.
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 6ab619f..d58ff84 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -1256,7 +1256,7 @@
 7. SocketCAN resources
 -----------------------
 
-  The Linux CAN / SocketCAN project ressources (project site / mailing list)
+  The Linux CAN / SocketCAN project resources (project site / mailing list)
   are referenced in the MAINTAINERS file in the Linux source tree.
   Search for CAN NETWORK [LAYERS|DRIVERS].
 
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index b9a4edf..683ada5 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -230,7 +230,7 @@
   mul              0, 4                 A * <x>
   div              0, 4                 A / <x>
   mod              0, 4                 A % <x>
-  neg              0, 4                 !A
+  neg                                   !A
   and              0, 4                 A & <x>
   or               0, 4                 A | <x>
   xor              0, 4                 A ^ <x>
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
index 7cb7264..50022b3 100644
--- a/Documentation/pps/pps.txt
+++ b/Documentation/pps/pps.txt
@@ -98,7 +98,7 @@
     };
 
 and then calling the function pps_register_source() in your
-intialization routine as follows:
+initialization routine as follows:
 
     source = pps_register_source(&pps_ktimer_info,
 			PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index af6fce2..61c22d6 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -126,9 +126,9 @@
 
  - no VM changes are needed - 'struct address_space' is left alone.
 
- - no registration of individual locks is needed: robust mutexes dont
+ - no registration of individual locks is needed: robust mutexes don't
    need any extra per-lock syscalls. Robust mutexes thus become a very
-   lightweight primitive - so they dont force the application designer
+   lightweight primitive - so they don't force the application designer
    to do a hard choice between performance and robustness - robust
    mutexes are just as fast.
 
@@ -202,7 +202,7 @@
 Testing, architecture support
 -----------------------------
 
-i've tested the new syscalls on x86 and x86_64, and have made sure the
+I've tested the new syscalls on x86 and x86_64, and have made sure the
 parsing of the userspace list is robust [ ;-) ] even if the list is
 deliberately corrupted.
 
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index 18b5709..00ffdf18 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -63,7 +63,7 @@
 Current Version : 06.506.00.00-rc1
 Old Version     : 06.504.01.00-rc1
     1. Add 4k FastPath DIF support.
-    2. Dont load DevHandle unless FastPath enabled.
+    2. Don't load DevHandle unless FastPath enabled.
     3. Version and Changelog update.
 -------------------------------------------------------------------------------
 Release Date    : Mon. Oct 1, 2012 17:00:00 PST 2012 -
@@ -105,7 +105,7 @@
     1. Fix reglockFlags for degraded raid5/6 for MR 9360/9380.
     2. Mask off flags in ioctl path to prevent memory scribble with older
        MegaCLI versions.
-    3. Remove poll_mode_io module paramater, sysfs node, and associated code.
+    3. Remove poll_mode_io module parameter, sysfs node, and associated code.
 -------------------------------------------------------------------------------
 Release Date    : Wed. Oct 5, 2011 17:00:00 PST 2010 -
 			(emaild-id:megaraidlinux@lsi.com)
@@ -199,7 +199,7 @@
 1.	Add the Online Controller Reset (OCR) to the Driver.
 	OCR is the new feature for megaraid_sas driver which
 	will allow the fw to do the chip reset which will not
-	affact the OS behavious.
+	affect the OS behavior.
 
 	To add the OCR support, driver need to do:
 		a). reset the controller chips -- Xscale and Gen2 which
@@ -233,7 +233,7 @@
 	failed state.  Driver will kill adapter if can't bring back FW after the
 	this three times reset.
 4.	Add the input parameter max_sectors to 1MB support to our GEN2 controller.
-	customer can use the input paramenter max_sectors to add 1MB support to GEN2
+	customer can use the input parameter max_sectors to add 1MB support to GEN2
 	controller.
 
 1 Release Date    : Thur.  Oct 29, 2009 09:12:45 PST 2009 -
@@ -582,11 +582,11 @@
 
 1 Release Date    : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
 2 Current Version : 00.00.02.04
-3 Older Version   : 00.00.02.04 
+3 Older Version   : 00.00.02.04
 
-i.	Remove superflous instance_lock
+i.	Remove superfluous instance_lock
 
-	gets rid of the otherwise superflous instance_lock and avoids an unsave 
+	gets rid of the otherwise superfluous instance_lock and avoids an unsafe
 	unsynchronized access in the error handler.
 
 		- Christoph Hellwig <hch@lst.de>
@@ -594,43 +594,43 @@
 
 1 Release Date    : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
 2 Current Version : 00.00.02.04
-3 Older Version   : 00.00.02.04 
+3 Older Version   : 00.00.02.04
 
 i.	Support for 1078 type (ppc IOP) controller, device id : 0x60 added.
-	During initialization, depending on the device id, the template members 
-	are initialized with function pointers specific to the ppc or 
-	xscale controllers.  
+	During initialization, depending on the device id, the template members
+	are initialized with function pointers specific to the ppc or
+	xscale controllers.
 
 		-Sumant Patro <Sumant.Patro@lsil.com>
 		
-1 Release Date    : Fri Feb 03 14:16:25 PST 2006 - Sumant Patro 
+1 Release Date    : Fri Feb 03 14:16:25 PST 2006 - Sumant Patro
 							<Sumant.Patro@lsil.com>
 2 Current Version : 00.00.02.04
-3 Older Version   : 00.00.02.02 
-i.	Register 16 byte CDB capability with scsi midlayer 
+3 Older Version   : 00.00.02.02
+i.	Register 16 byte CDB capability with scsi midlayer
 
-	"This patch properly registers the 16 byte command length capability of the 
-	megaraid_sas controlled hardware with the scsi midlayer. All megaraid_sas 
+	"This patch properly registers the 16 byte command length capability of the
+	megaraid_sas controlled hardware with the scsi midlayer. All megaraid_sas
 	hardware supports 16 byte CDB's."
 
-		-Joshua Giles <joshua_giles@dell.com> 
+		-Joshua Giles <joshua_giles@dell.com>
 
 1 Release Date    : Mon Jan 23 14:09:01 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
 2 Current Version : 00.00.02.02
-3 Older Version   : 00.00.02.01 
+3 Older Version   : 00.00.02.01
 
-i.	New template defined to represent each family of controllers (identified by processor used). 
-	The template will have defintions that will be initialised to appropritae values for a specific family of controllers. The template definition has four function pointers. During driver initialisation the function pointers will be set based on the controller family type. This change is done to support new controllers that has different processors and thus different register set.
+i.	New template defined to represent each family of controllers (identified by processor used).
+	The template will have definitions that will be initialised to appropriate values for a specific family of controllers. The template definition has four function pointers. During driver initialisation the function pointers will be set based on the controller family type. This change is done to support new controllers that has different processors and thus different register set.
 
 		-Sumant Patro <Sumant.Patro@lsil.com>
 
 1 Release Date    : Mon Dec 19 14:36:26 PST 2005 - Sumant Patro <Sumant.Patro@lsil.com>
-2 Current Version : 00.00.02.00-rc4 
-3 Older Version   : 00.00.02.01 
+2 Current Version : 00.00.02.00-rc4
+3 Older Version   : 00.00.02.01
 
-i.	Code reorganized to remove code duplication in megasas_build_cmd. 
+i.	Code reorganized to remove code duplication in megasas_build_cmd.
 
-	"There's a lot of duplicate code megasas_build_cmd.  Move that out of the different codepathes and merge the reminder of megasas_build_cmd into megasas_queue_command"
+	"There's a lot of duplicate code megasas_build_cmd.  Move that out of the different codepaths and merge the reminder of megasas_build_cmd into megasas_queue_command"
 
 		- Christoph Hellwig <hch@lst.de>
 
diff --git a/Documentation/scsi/bfa.txt b/Documentation/scsi/bfa.txt
index f2d6e9d..3cc4d80 100644
--- a/Documentation/scsi/bfa.txt
+++ b/Documentation/scsi/bfa.txt
@@ -50,7 +50,7 @@
 
 http://www.brocade.com/services-support/drivers-downloads/adapters/Linux.page
 
-and then click following respective util pacakge link
+and then click following respective util package link
 
 	Version			Link
 
diff --git a/Documentation/security/LoadPin.txt b/Documentation/security/LoadPin.txt
new file mode 100644
index 0000000..e11877f
--- /dev/null
+++ b/Documentation/security/LoadPin.txt
@@ -0,0 +1,17 @@
+LoadPin is a Linux Security Module that ensures all kernel-loaded files
+(modules, firmware, etc) all originate from the same filesystem, with
+the expectation that such a filesystem is backed by a read-only device
+such as dm-verity or CDROM. This allows systems that have a verified
+and/or unchangeable filesystem to enforce module and firmware loading
+restrictions without needing to sign the files individually.
+
+The LSM is selectable at build-time with CONFIG_SECURITY_LOADPIN, and
+can be controlled at boot-time with the kernel command line option
+"loadpin.enabled". By default, it is enabled, but can be disabled at
+boot ("loadpin.enabled=0").
+
+LoadPin starts pinning when it sees the first file loaded. If the
+block device backing the filesystem is not read-only, a sysctl is
+created to toggle pinning: /proc/sys/kernel/loadpin/enabled. (Having
+a mutable filesystem means pinning is mutable too, but having the
+sysctl allows for easy testing on systems with a mutable filesystem.)
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 8c18387..20d0571 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -823,6 +823,36 @@
      A process must have search permission on the key for this function to be
      successful.
 
+ (*) Compute a Diffie-Hellman shared secret or public key
+
+       long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params,
+		   char *buffer, size_t buflen);
+
+     The params struct contains serial numbers for three keys:
+
+	 - The prime, p, known to both parties
+	 - The local private key
+	 - The base integer, which is either a shared generator or the
+	   remote public key
+
+     The value computed is:
+
+	result = base ^ private (mod prime)
+
+     If the base is the shared generator, the result is the local
+     public key.  If the base is the remote public key, the result is
+     the shared secret.
+
+     The buffer length must be at least the length of the prime, or zero.
+
+     If the buffer length is nonzero, the length of the result is
+     returned when it is successfully calculated and copied in to the
+     buffer. When the buffer length is zero, the minimum required
+     buffer length is returned.
+
+     This function will return error EOPNOTSUPP if the key type is not
+     supported, error ENOKEY if the key could not be found, or error
+     EACCES if the key is not readable by the caller.
 
 ===============
 KERNEL SERVICES
@@ -999,6 +1029,10 @@
 	struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 				  const struct cred *cred,
 				  key_perm_t perm,
+				  int (*restrict_link)(struct key *,
+						       const struct key_type *,
+						       unsigned long,
+						       const union key_payload *),
 				  unsigned long flags,
 				  struct key *dest);
 
@@ -1010,6 +1044,24 @@
     KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
     towards the user's quota).  Error ENOMEM can also be returned.
 
+    If restrict_link not NULL, it should point to a function that will be
+    called each time an attempt is made to link a key into the new keyring.
+    This function is called to check whether a key may be added into the keying
+    or not.  Callers of key_create_or_update() within the kernel can pass
+    KEY_ALLOC_BYPASS_RESTRICTION to suppress the check.  An example of using
+    this is to manage rings of cryptographic keys that are set up when the
+    kernel boots where userspace is also permitted to add keys - provided they
+    can be verified by a key the kernel already has.
+
+    When called, the restriction function will be passed the keyring being
+    added to, the key flags value and the type and payload of the key being
+    added.  Note that when a new key is being created, this is called between
+    payload preparsing and actual key creation.  The function should return 0
+    to allow the link or an error to reject it.
+
+    A convenience function, restrict_link_reject, exists to always return
+    -EPERM to in this case.
+
 
 (*) To check the validity of a key, this function can be called:
 
diff --git a/Documentation/security/self-protection.txt b/Documentation/security/self-protection.txt
new file mode 100644
index 0000000..babd637
--- /dev/null
+++ b/Documentation/security/self-protection.txt
@@ -0,0 +1,261 @@
+# Kernel Self-Protection
+
+Kernel self-protection is the design and implementation of systems and
+structures within the Linux kernel to protect against security flaws in
+the kernel itself. This covers a wide range of issues, including removing
+entire classes of bugs, blocking security flaw exploitation methods,
+and actively detecting attack attempts. Not all topics are explored in
+this document, but it should serve as a reasonable starting point and
+answer any frequently asked questions. (Patches welcome, of course!)
+
+In the worst-case scenario, we assume an unprivileged local attacker
+has arbitrary read and write access to the kernel's memory. In many
+cases, bugs being exploited will not provide this level of access,
+but with systems in place that defend against the worst case we'll
+cover the more limited cases as well. A higher bar, and one that should
+still be kept in mind, is protecting the kernel against a _privileged_
+local attacker, since the root user has access to a vastly increased
+attack surface. (Especially when they have the ability to load arbitrary
+kernel modules.)
+
+The goals for successful self-protection systems would be that they
+are effective, on by default, require no opt-in by developers, have no
+performance impact, do not impede kernel debugging, and have tests. It
+is uncommon that all these goals can be met, but it is worth explicitly
+mentioning them, since these aspects need to be explored, dealt with,
+and/or accepted.
+
+
+## Attack Surface Reduction
+
+The most fundamental defense against security exploits is to reduce the
+areas of the kernel that can be used to redirect execution. This ranges
+from limiting the exposed APIs available to userspace, making in-kernel
+APIs hard to use incorrectly, minimizing the areas of writable kernel
+memory, etc.
+
+### Strict kernel memory permissions
+
+When all of kernel memory is writable, it becomes trivial for attacks
+to redirect execution flow. To reduce the availability of these targets
+the kernel needs to protect its memory with a tight set of permissions.
+
+#### Executable code and read-only data must not be writable
+
+Any areas of the kernel with executable memory must not be writable.
+While this obviously includes the kernel text itself, we must consider
+all additional places too: kernel modules, JIT memory, etc. (There are
+temporary exceptions to this rule to support things like instruction
+alternatives, breakpoints, kprobes, etc. If these must exist in a
+kernel, they are implemented in a way where the memory is temporarily
+made writable during the update, and then returned to the original
+permissions.)
+
+In support of this are (the poorly named) CONFIG_DEBUG_RODATA and
+CONFIG_DEBUG_SET_MODULE_RONX, which seek to make sure that code is not
+writable, data is not executable, and read-only data is neither writable
+nor executable.
+
+#### Function pointers and sensitive variables must not be writable
+
+Vast areas of kernel memory contain function pointers that are looked
+up by the kernel and used to continue execution (e.g. descriptor/vector
+tables, file/network/etc operation structures, etc). The number of these
+variables must be reduced to an absolute minimum.
+
+Many such variables can be made read-only by setting them "const"
+so that they live in the .rodata section instead of the .data section
+of the kernel, gaining the protection of the kernel's strict memory
+permissions as described above.
+
+For variables that are initialized once at __init time, these can
+be marked with the (new and under development) __ro_after_init
+attribute.
+
+What remains are variables that are updated rarely (e.g. GDT). These
+will need another infrastructure (similar to the temporary exceptions
+made to kernel code mentioned above) that allow them to spend the rest
+of their lifetime read-only. (For example, when being updated, only the
+CPU thread performing the update would be given uninterruptible write
+access to the memory.)
+
+#### Segregation of kernel memory from userspace memory
+
+The kernel must never execute userspace memory. The kernel must also never
+access userspace memory without explicit expectation to do so. These
+rules can be enforced either by support of hardware-based restrictions
+(x86's SMEP/SMAP, ARM's PXN/PAN) or via emulation (ARM's Memory Domains).
+By blocking userspace memory in this way, execution and data parsing
+cannot be passed to trivially-controlled userspace memory, forcing
+attacks to operate entirely in kernel memory.
+
+### Reduced access to syscalls
+
+One trivial way to eliminate many syscalls for 64-bit systems is building
+without CONFIG_COMPAT. However, this is rarely a feasible scenario.
+
+The "seccomp" system provides an opt-in feature made available to
+userspace, which provides a way to reduce the number of kernel entry
+points available to a running process. This limits the breadth of kernel
+code that can be reached, possibly reducing the availability of a given
+bug to an attack.
+
+An area of improvement would be creating viable ways to keep access to
+things like compat, user namespaces, BPF creation, and perf limited only
+to trusted processes. This would keep the scope of kernel entry points
+restricted to the more regular set of normally available to unprivileged
+userspace.
+
+### Restricting access to kernel modules
+
+The kernel should never allow an unprivileged user the ability to
+load specific kernel modules, since that would provide a facility to
+unexpectedly extend the available attack surface. (The on-demand loading
+of modules via their predefined subsystems, e.g. MODULE_ALIAS_*, is
+considered "expected" here, though additional consideration should be
+given even to these.) For example, loading a filesystem module via an
+unprivileged socket API is nonsense: only the root or physically local
+user should trigger filesystem module loading. (And even this can be up
+for debate in some scenarios.)
+
+To protect against even privileged users, systems may need to either
+disable module loading entirely (e.g. monolithic kernel builds or
+modules_disabled sysctl), or provide signed modules (e.g.
+CONFIG_MODULE_SIG_FORCE, or dm-crypt with LoadPin), to keep from having
+root load arbitrary kernel code via the module loader interface.
+
+
+## Memory integrity
+
+There are many memory structures in the kernel that are regularly abused
+to gain execution control during an attack, By far the most commonly
+understood is that of the stack buffer overflow in which the return
+address stored on the stack is overwritten. Many other examples of this
+kind of attack exist, and protections exist to defend against them.
+
+### Stack buffer overflow
+
+The classic stack buffer overflow involves writing past the expected end
+of a variable stored on the stack, ultimately writing a controlled value
+to the stack frame's stored return address. The most widely used defense
+is the presence of a stack canary between the stack variables and the
+return address (CONFIG_CC_STACKPROTECTOR), which is verified just before
+the function returns. Other defenses include things like shadow stacks.
+
+### Stack depth overflow
+
+A less well understood attack is using a bug that triggers the
+kernel to consume stack memory with deep function calls or large stack
+allocations. With this attack it is possible to write beyond the end of
+the kernel's preallocated stack space and into sensitive structures. Two
+important changes need to be made for better protections: moving the
+sensitive thread_info structure elsewhere, and adding a faulting memory
+hole at the bottom of the stack to catch these overflows.
+
+### Heap memory integrity
+
+The structures used to track heap free lists can be sanity-checked during
+allocation and freeing to make sure they aren't being used to manipulate
+other memory areas.
+
+### Counter integrity
+
+Many places in the kernel use atomic counters to track object references
+or perform similar lifetime management. When these counters can be made
+to wrap (over or under) this traditionally exposes a use-after-free
+flaw. By trapping atomic wrapping, this class of bug vanishes.
+
+### Size calculation overflow detection
+
+Similar to counter overflow, integer overflows (usually size calculations)
+need to be detected at runtime to kill this class of bug, which
+traditionally leads to being able to write past the end of kernel buffers.
+
+
+## Statistical defenses
+
+While many protections can be considered deterministic (e.g. read-only
+memory cannot be written to), some protections provide only statistical
+defense, in that an attack must gather enough information about a
+running system to overcome the defense. While not perfect, these do
+provide meaningful defenses.
+
+### Canaries, blinding, and other secrets
+
+It should be noted that things like the stack canary discussed earlier
+are technically statistical defenses, since they rely on a (leakable)
+secret value.
+
+Blinding literal values for things like JITs, where the executable
+contents may be partially under the control of userspace, need a similar
+secret value.
+
+It is critical that the secret values used must be separate (e.g.
+different canary per stack) and high entropy (e.g. is the RNG actually
+working?) in order to maximize their success.
+
+### Kernel Address Space Layout Randomization (KASLR)
+
+Since the location of kernel memory is almost always instrumental in
+mounting a successful attack, making the location non-deterministic
+raises the difficulty of an exploit. (Note that this in turn makes
+the value of leaks higher, since they may be used to discover desired
+memory locations.)
+
+#### Text and module base
+
+By relocating the physical and virtual base address of the kernel at
+boot-time (CONFIG_RANDOMIZE_BASE), attacks needing kernel code will be
+frustrated. Additionally, offsetting the module loading base address
+means that even systems that load the same set of modules in the same
+order every boot will not share a common base address with the rest of
+the kernel text.
+
+#### Stack base
+
+If the base address of the kernel stack is not the same between processes,
+or even not the same between syscalls, targets on or beyond the stack
+become more difficult to locate.
+
+#### Dynamic memory base
+
+Much of the kernel's dynamic memory (e.g. kmalloc, vmalloc, etc) ends up
+being relatively deterministic in layout due to the order of early-boot
+initializations. If the base address of these areas is not the same
+between boots, targeting them is frustrated, requiring a leak specific
+to the region.
+
+
+## Preventing Leaks
+
+Since the locations of sensitive structures are the primary target for
+attacks, it is important to defend against leaks of both kernel memory
+addresses and kernel memory contents (since they may contain kernel
+addresses or other sensitive things like canary values).
+
+### Unique identifiers
+
+Kernel memory addresses must never be used as identifiers exposed to
+userspace. Instead, use an atomic counter, an idr, or similar unique
+identifier.
+
+### Memory initialization
+
+Memory copied to userspace must always be fully initialized. If not
+explicitly memset(), this will require changes to the compiler to make
+sure structure holes are cleared.
+
+### Memory poisoning
+
+When releasing memory, it is best to poison the contents (clear stack on
+syscall return, wipe heap memory on a free), to avoid reuse attacks that
+rely on the old contents of memory. This frustrates many uninitialized
+variable attacks, stack info leaks, heap info leaks, and use-after-free
+attacks.
+
+### Destination tracking
+
+To help kill classes of bugs that result in kernel addresses being
+written to userspace, the destination of writes needs to be tracked. If
+the buffer is destined for userspace (e.g. seq_file backed /proc files),
+it should automatically censor sensitive values.
diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index 379468e..da193e0 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -28,7 +28,7 @@
 the correct port structure (via uart_get_console) and decoding command line
 arguments (uart_parse_options).
 
-There is also a helper function (uart_write_console) which performs a
+There is also a helper function (uart_console_write) which performs a
 character by character write, translating newlines to CRLF sequences.
 Driver writers are recommended to use this function rather than implementing
 their own version.
@@ -41,27 +41,23 @@
 necessary locking using port->lock.  There are some exceptions (which
 are described in the uart_ops listing below.)
 
-There are three locks.  A per-port spinlock, a per-port tmpbuf semaphore,
-and an overall semaphore.
+There are two locks.  A per-port spinlock, and an overall semaphore.
 
 From the core driver perspective, the port->lock locks the following
 data:
 
 	port->mctrl
 	port->icount
-	info->xmit.head (circ->head)
-	info->xmit.tail (circ->tail)
+	port->state->xmit.head (circ_buf->head)
+	port->state->xmit.tail (circ_buf->tail)
 
 The low level driver is free to use this lock to provide any additional
 locking.
 
-The core driver uses the info->tmpbuf_sem lock to prevent multi-threaded
-access to the info->tmpbuf bouncebuffer used for port writes.
-
 The port_sem semaphore is used to protect against ports being added/
 removed or reconfigured at inappropriate times. Since v2.6.27, this
 semaphore has been the 'mutex' member of the tty_port struct, and
-commonly referred to as the port mutex (or port->mutex).
+commonly referred to as the port mutex.
 
 
 uart_ops
@@ -135,6 +131,24 @@
 	Interrupts: locally disabled.
 	This call must not sleep
 
+  throttle(port)
+	Notify the serial driver that input buffers for the line discipline are
+	close to full, and it should somehow signal that no more characters
+	should be sent to the serial port.
+	This will be called only if hardware assisted flow control is enabled.
+
+	Locking: serialized with .unthrottle() and termios modification by the
+		 tty layer.
+
+  unthrottle(port)
+	Notify the serial driver that characters can now be sent to the serial
+	port without fear of overrunning the input buffers of the line
+	disciplines.
+	This will be called only if hardware assisted flow control is enabled.
+
+	Locking: serialized with .throttle() and termios modification by the
+		 tty layer.
+
   send_xchar(port,ch)
 	Transmit a high priority character, even if the port is stopped.
 	This is used to implement XON/XOFF flow control and tcflow().  If
@@ -172,9 +186,7 @@
 	should be terminated when another call is made with a zero
 	ctl.
 
-	Locking: none.
-	Interrupts: caller dependent.
-	This call must not sleep
+	Locking: caller holds tty_port->mutex
 
   startup(port)
 	Grab any interrupt resources and initialise any low level driver
@@ -192,7 +204,7 @@
 	RTS nor DTR; this will have already been done via a separate
 	call to set_mctrl.
 
-	Drivers must not access port->info once this call has completed.
+	Drivers must not access port->state once this call has completed.
 
 	This method will only be called when there are no more users of
 	this port.
@@ -204,7 +216,7 @@
 	Flush any write buffers, reset any DMA state and stop any
 	ongoing DMA transfers.
 
-	This will be called whenever the port->info->xmit circular
+	This will be called whenever the port->state->xmit circular
 	buffer is cleared.
 
 	Locking: port->lock taken.
@@ -250,10 +262,15 @@
 	Other flags may be used (eg, xon/xoff characters) if your
 	hardware supports hardware "soft" flow control.
 
-	Locking: caller holds port->mutex
+	Locking: caller holds tty_port->mutex
 	Interrupts: caller dependent.
 	This call must not sleep
 
+  set_ldisc(port,termios)
+	Notifier for discipline change. See Documentation/serial/tty.txt.
+
+	Locking: caller holds tty_port->mutex
+
   pm(port,state,oldstate)
 	Perform any power management related activities on the specified
 	port.  State indicates the new state (defined by
@@ -371,7 +388,7 @@
 	Interrupts: n/a
 
 uart_get_divisor(port,baud)
-	Return the divsor (baud_base / baud) for the specified baud
+	Return the divisor (baud_base / baud) for the specified baud
 	rate, appropriately rounded.
 
 	If 38400 baud and custom divisor is selected, return the
@@ -449,11 +466,12 @@
 
 mctrl_gpio_free(dev, gpios):
 	This will free the requested gpios in mctrl_gpio_init().
-	As devm_* function are used, there's generally no need to call
+	As devm_* functions are used, there's generally no need to call
 	this function.
 
 mctrl_gpio_to_gpiod(gpios, gidx)
-	This returns the gpio structure associated to the modem line index.
+	This returns the gpio_desc structure associated to the modem line
+	index.
 
 mctrl_gpio_set(gpios, mctrl):
 	This will sets the gpios according to the mctrl state.
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index e7193aa..d4510eb 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -655,17 +655,6 @@
 and next kernels are found in for-linus and for-next branches,
 respectively.
 
-If you are using the latest Linus tree, it'd be better to pull the
-above GIT tree onto it.  If you are using the older kernels, an easy
-way to try the latest ALSA code is to build from the snapshot
-tarball.  There are daily tarballs and the latest snapshot tarball.
-All can be built just like normal alsa-driver release packages, that
-is, installed via the usual spells: configure, make and make
-install(-modules).  See INSTALL in the package.  The snapshot tarballs
-are found at:
-
-- ftp://ftp.suse.com/pub/people/tiwai/snapshot/
-
 
 Sending a Bug Report
 ~~~~~~~~~~~~~~~~~~~~
@@ -699,7 +688,12 @@
 alsa-info
 ~~~~~~~~~
 The script `alsa-info.sh` is a very useful tool to gather the audio
-device information.  You can fetch the latest version from:
+device information.  It's included in alsa-utils package.  The latest
+version can be found on git repository:
+
+- git://git.alsa-project.org/alsa-utils.git
+
+The script can be fetched directly from the following URL, too:
 
 - http://www.alsa-project.org/alsa-info.sh
 
@@ -836,15 +830,11 @@
 (mixer) elements, set/get the control element value, simulate the PCM
 operation, the jack plugging simulation, etc.
 
-The package is found in:
-
-- ftp://ftp.suse.com/pub/people/tiwai/misc/
-
-A git repository is available:
+The program is found in the git repository below:
 
 - git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/hda-emu.git
 
-See README file in the tarball for more details about hda-emu
+See README file in the repository for more details about hda-emu
 program.
 
 
diff --git a/Documentation/sound/alsa/compress_offload.txt b/Documentation/sound/alsa/compress_offload.txt
index 630c492..8ba556a 100644
--- a/Documentation/sound/alsa/compress_offload.txt
+++ b/Documentation/sound/alsa/compress_offload.txt
@@ -149,7 +149,7 @@
 ================
 When playing thru an album, the decoders have the ability to skip the encoder
 delay and padding and directly move from one track content to another. The end
-user can perceive this as gapless playback as we dont have silence while
+user can perceive this as gapless playback as we don't have silence while
 switching from one track to another
 
 Also, there might be low-intensity noises due to encoding. Perfect gapless is
@@ -184,7 +184,7 @@
 - Fill data of the first track
 - Trigger start
 - User-space finished sending all,
-- Indicaite next track data by sending set_next_track
+- Indicate next track data by sending set_next_track
 - Set metadata of the next track
 - then call partial_drain to flush most of buffer in DSP
 - Fill data of the next track
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 6faab48..c45bd79 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -132,7 +132,7 @@
 SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls,
 	ARRAY_SIZE(wm8731_output_mixer_controls)),
 
-If you dont want the mixer elements prefixed with the name of the mixer widget,
+If you don't want the mixer elements prefixed with the name of the mixer widget,
 you can use SND_SOC_DAPM_MIXER_NAMED_CTL instead. the parameters are the same
 as for SND_SOC_DAPM_MIXER.
 
diff --git a/Documentation/sound/alsa/soc/overview.txt b/Documentation/sound/alsa/soc/overview.txt
index ff88f52..f3f28b7 100644
--- a/Documentation/sound/alsa/soc/overview.txt
+++ b/Documentation/sound/alsa/soc/overview.txt
@@ -63,7 +63,7 @@
     and any audio DSP drivers for that platform.
 
   * Machine class driver: The machine driver class acts as the glue that
-    decribes and binds the other component drivers together to form an ALSA
+    describes and binds the other component drivers together to form an ALSA
     "sound card device". It handles any machine specific controls and
     machine level audio events (e.g. turning on an amp at start of playback).
 
diff --git a/Documentation/sound/alsa/timestamping.txt b/Documentation/sound/alsa/timestamping.txt
index 0b191a2..1b6473f 100644
--- a/Documentation/sound/alsa/timestamping.txt
+++ b/Documentation/sound/alsa/timestamping.txt
@@ -129,7 +129,7 @@
 interpolation of the results
 
 In some hardware-specific configuration, the system timestamp is
-latched by a low-level audio subsytem, and the information provided
+latched by a low-level audio subsystem, and the information provided
 back to the driver. Due to potential delays in the communication with
 the hardware, there is a risk of misalignment with the avail and delay
 information. To make sure applications are not confused, a
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 13f5619..3a3b30a 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -212,7 +212,7 @@
 overwritten since you registered it.
 
 The Magic SysRQ system works by registering key operations against a key op
-lookup table, which is defined in 'drivers/char/sysrq.c'. This key table has
+lookup table, which is defined in 'drivers/tty/sysrq.c'. This key table has
 a number of operations registered into it at compile time, but is mutable,
 and 2 functions are exported for interface to it:
 	register_sysrq_key and unregister_sysrq_key.
diff --git a/Documentation/timers/hrtimers.txt b/Documentation/timers/hrtimers.txt
index ce31f65..588d857 100644
--- a/Documentation/timers/hrtimers.txt
+++ b/Documentation/timers/hrtimers.txt
@@ -28,9 +28,9 @@
 
 - the unpredictable [O(N)] overhead of cascading leads to delays which
   necessitate a more complex handling of high resolution timers, which
-  in turn decreases robustness. Such a design still led to rather large
+  in turn decreases robustness. Such a design still leads to rather large
   timing inaccuracies. Cascading is a fundamental property of the timer
-  wheel concept, it cannot be 'designed out' without unevitably
+  wheel concept, it cannot be 'designed out' without inevitably
   degrading other portions of the timers.c code in an unacceptable way.
 
 - the implementation of the current posix-timer subsystem on top of
@@ -119,7 +119,7 @@
 hrtimer functions now have clearer behavior and clearer names - such as
 hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
 equivalent to del_timer() and del_timer_sync()] - so there's no direct
-1:1 mapping between them on the algorithmical level, and thus no real
+1:1 mapping between them on the algorithmic level, and thus no real
 potential for code sharing either.
 
 Basic data types: every time value, absolute or relative, is in a
diff --git a/Documentation/video4linux/README.cx88 b/Documentation/video4linux/README.cx88
index 35fae23..b09ce36 100644
--- a/Documentation/video4linux/README.cx88
+++ b/Documentation/video4linux/README.cx88
@@ -50,7 +50,7 @@
      cx88-cards.c.  If that worked, mail me your changes as unified
      diff ("diff -u").
  (3) Or you can mail me the config information.  I need at least the
-     following informations to add the card:
+     following information to add the card:
 
      * the PCI Subsystem ID ("0070:3400" from the line above,
        "lspci -v" output is fine too).
diff --git a/Documentation/video4linux/bttv/Sound-FAQ b/Documentation/video4linux/bttv/Sound-FAQ
index d3f1d77..646a47d 100644
--- a/Documentation/video4linux/bttv/Sound-FAQ
+++ b/Documentation/video4linux/bttv/Sound-FAQ
@@ -55,7 +55,7 @@
 to connect the mux chip.
 
 As mentioned above, there is a array which holds the required
-informations for each known board.  You basically have to create a new
+information for each known board.  You basically have to create a new
 line for your board.  The important fields are these two:
 
 struct tvcard
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4d0542c..a4482cc 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -199,8 +199,8 @@
 Parameters: vcpu id (apic id on x86)
 Returns: vcpu fd on success, -1 on error
 
-This API adds a vcpu to a virtual machine.  The vcpu id is a small integer
-in the range [0, max_vcpus).
+This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
+The vcpu id is an integer in the range [0, max_vcpu_id).
 
 The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
 the KVM_CHECK_EXTENSION ioctl() at run-time.
@@ -212,6 +212,12 @@
 If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
 same as the value returned from KVM_CAP_NR_VCPUS.
 
+The maximum possible value for max_vcpu_id can be retrieved using the
+KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
+is the same as the value returned from KVM_CAP_MAX_VCPUS.
+
 On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
 threads in one or more virtual CPU cores.  (This is because the
 hardware requires all the hardware threads in a CPU core to be in the
@@ -3788,6 +3794,14 @@
 Fails if VCPU has already been created, or if the irqchip is already in the
 kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
 
+7.6 KVM_CAP_S390_RI
+
+Architectures: s390
+Parameters: none
+
+Allows use of runtime-instrumentation introduced with zEC12 processor.
+Will return -EINVAL if the machine does not support runtime-instrumentation.
+Will return -EBUSY if a VCPU has already been created.
 
 8. Other capabilities.
 ----------------------
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index e3e314c..6b0e115 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -11,6 +11,7 @@
 - add interrupts (KVM_DEV_FLIC_ENQUEUE)
 - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
 - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
 - enable/disable for the guest transparent async page faults
 - register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
 
@@ -40,6 +41,11 @@
     Simply deletes all elements from the list of currently pending floating
     interrupts.  No interrupts are injected into the guest.
 
+  KVM_DEV_FLIC_CLEAR_IO_IRQ
+    Deletes one (if any) I/O interrupt for a subchannel identified by the
+    subsystem identification word passed via the buffer specified by
+    attr->addr (address) and attr->attr (length).
+
   KVM_DEV_FLIC_APF_ENABLE
     Enables async page faults for the guest. So in case of a major page fault
     the host is allowed to handle this async and continues the guest.
@@ -68,7 +74,7 @@
 
   KVM_DEV_FLIC_ADAPTER_MODIFY
     Modifies attributes of an existing I/O adapter interrupt source. Takes
-    a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+    a kvm_s390_io_adapter_req specifying the adapter and the operation:
 
 struct kvm_s390_io_adapter_req {
 	__u32 id;
@@ -94,3 +100,9 @@
     KVM_S390_IO_ADAPTER_UNMAP
       release a userspace page for the translated address specified in addr
       from the list of mappings
+
+Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
+FLIC with an unknown group or attribute gives the error code EINVAL (instead of
+ENXIO, as specified in the API documentation). It is not possible to conclude
+that a FLIC operation is unavailable based on the error code resulting from a
+usage attempt.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 54dd9b9..59cbc80 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -220,7 +220,7 @@
    node list of "all" with numactl --interleave or --membind [-m] to achieve
    interleaving over all nodes in the system or cpuset.
 
-4) Any task mempolicy specifed--e.g., using numactl--will be constrained by
+4) Any task mempolicy specified--e.g., using numactl--will be constrained by
    the resource limits of any cpuset in which the task runs.  Thus, there will
    be no way for a task with non-default policy running in a cpuset with a
    subset of the system nodes to allocate huge pages outside the cpuset
@@ -275,10 +275,10 @@
 options sets the owner and group of the root of the file system.  By default
 the uid and gid of the current process are taken.  The mode option sets the
 mode of root of file system to value & 01777.  This value is given in octal.
-By default the value 0755 is picked. If the paltform supports multiple huge
+By default the value 0755 is picked. If the platform supports multiple huge
 page sizes, the pagesize option can be used to specify the huge page size and
 associated pool.  pagesize is specified in bytes.  If pagesize is not specified
-the paltform's default huge page size and associated pool will be used. The
+the platform's default huge page size and associated pool will be used. The
 size option sets the maximum value of memory (huge pages) allowed for that
 filesystem (/mnt/huge).  The size option can be specified in bytes, or as a
 percentage of the specified huge page pool (nr_hugepages).  The size is
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 0e1e555..eafcefa 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -62,7 +62,7 @@
     14. SWAPBACKED
     15. COMPOUND_HEAD
     16. COMPOUND_TAIL
-    16. HUGE
+    17. HUGE
     18. UNEVICTABLE
     19. HWPOISON
     20. NOPAGE
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 818518a..1a5a121 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt
@@ -136,7 +136,7 @@
    If we were to preallocate them for the 128TB of user virtual address
    space, we would need to reserve 512TB+2GB, which is larger than the
    entire virtual address space today. This means they can not be reserved
-   ahead of time. Also, a single process's pre-popualated bounds directory
+   ahead of time. Also, a single process's pre-populated bounds directory
    consumes 2GB of virtual *AND* physical memory. IOW, it's completely
    infeasible to prepopulate bounds directories.
 
@@ -151,7 +151,7 @@
    these calls.
 
 Q: Could a bounds fault be handed to userspace and the tables allocated
-   there in a signal handler intead of in the kernel?
+   there in a signal handler instead of in the kernel?
 A: mmap() is not on the list of safe async handler functions and even
    if mmap() would work it still requires locking or nasty tricks to
    keep track of the allocation state there.
diff --git a/Documentation/xillybus.txt b/Documentation/xillybus.txt
index 81d111b..1660145 100644
--- a/Documentation/xillybus.txt
+++ b/Documentation/xillybus.txt
@@ -215,7 +215,7 @@
   choice is a non-zero value, to match standard UNIX behavior.
 
 * synchronous: A non-zero value means that the pipe is synchronous. See
-  Syncronization above.
+  Synchronization above.
 
 * bufsize: Each DMA buffer's size. Always a power of two.
 
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
index 54ea24f..f0613b9 100644
--- a/Documentation/zh_CN/HOWTO
+++ b/Documentation/zh_CN/HOWTO
@@ -207,7 +207,7 @@
   - 每当一个新版本的内核被发布,为期两周的集成窗口将被打开。在这段时间里
     维护者可以向Linus提交大段的修改,通常这些修改已经被放到-mm内核中几个
     星期了。提交大量修改的首选方式是使用git工具(内核的代码版本管理工具
-    ,更多的信息可以在http://git.or.cz/获取),不过使用普通补丁也是可以
+    ,更多的信息可以在http://git-scm.com/获取),不过使用普通补丁也是可以
     的。
   - 两个星期以后-rc1版本内核发布。之后只有不包含可能影响整个内核稳定性的
     新功能的补丁才可能被接受。请注意一个全新的驱动程序(或者文件系统)有
@@ -218,8 +218,6 @@
     时,一个新的-rc版本就会被发布。计划是每周都发布新的-rc版本。
   - 这个过程一直持续下去直到内核被认为达到足够稳定的状态,持续时间大概是
     6个星期。
-  - 以下地址跟踪了在每个-rc发布中发现的退步列表:
-    http://kernelnewbies.org/known_regressions
 
 关于内核发布,值得一提的是Andrew Morton在linux-kernel邮件列表中如是说:
 	“没有人知道新内核何时会被发布,因为发布是根据已知bug的情况来决定
diff --git a/Documentation/zh_CN/arm64/booting.txt b/Documentation/zh_CN/arm64/booting.txt
index 1145bf8..c1dd968 100644
--- a/Documentation/zh_CN/arm64/booting.txt
+++ b/Documentation/zh_CN/arm64/booting.txt
@@ -8,7 +8,7 @@
 
 M:	Will Deacon <will.deacon@arm.com>
 zh_CN:	Fu Wei <wefu@redhat.com>
-C:	1926e54f115725a9248d0c4c65c22acaf94de4c4
+C:	55f058e7574c3615dea4615573a19bdb258696c6
 ---------------------------------------------------------------------
 Documentation/arm64/booting.txt 的中文翻译
 
@@ -20,7 +20,7 @@
 中文版维护者: 傅炜  Fu Wei <wefu@redhat.com>
 中文版翻译者: 傅炜  Fu Wei <wefu@redhat.com>
 中文版校译者: 傅炜  Fu Wei <wefu@redhat.com>
-本文翻译提交时的 Git 检出点为: 1926e54f115725a9248d0c4c65c22acaf94de4c4
+本文翻译提交时的 Git 检出点为: 55f058e7574c3615dea4615573a19bdb258696c6
 
 以下为正文
 ---------------------------------------------------------------------
@@ -125,18 +125,22 @@
 			1 - 4K
 			2 - 16K
 			3 - 64K
-  位 3-63:	保留。
+  位 3:		内核物理位置
+			0 - 2MB 对齐基址应尽量靠近内存起始处,因为
+			    其基址以下的内存无法通过线性映射访问
+			1 - 2MB 对齐基址可以在物理内存的任意位置
+  位 4-63:	保留。
 
 - 当 image_size 为零时,引导装载程序应试图在内核映像末尾之后尽可能
   多地保留空闲内存供内核直接使用。对内存空间的需求量因所选定的内核
   特性而异, 并无实际限制。
 
-内核映像必须被放置在靠近可用系统内存起始的 2MB 对齐为基址的
-text_offset 字节处,并从该处被调用。当前,对 Linux 来说在此基址以下的
-内存是无法使用的,因此强烈建议将系统内存的起始作为这个基址。2MB 对齐
-基址和内核映像起始地址之间的区域对于内核来说没有特殊意义,且可能被
-用于其他目的。
+内核映像必须被放置在任意一个可用系统内存 2MB 对齐基址的 text_offset
+字节处,并从该处被调用。2MB 对齐基址和内核映像起始地址之间的区域对于
+内核来说没有特殊意义,且可能被用于其他目的。
 从映像起始地址算起,最少必须准备 image_size 字节的空闲内存供内核使用。
+注: v4.6 之前的版本无法使用内核映像物理偏移以下的内存,所以当时建议
+将映像尽量放置在靠近系统内存起始的地方。
 
 任何提供给内核的内存(甚至在映像起始地址之前),若未从内核中标记为保留
 (如在设备树(dtb)的 memreserve 区域),都将被认为对内核是可用。
diff --git a/MAINTAINERS b/MAINTAINERS
index add406a..65f3277a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4444,6 +4444,12 @@
 F:	drivers/video/fbdev/exynos/exynos_mipi*
 F:	include/video/exynos_mipi*
 
+EZchip NPS platform support
+M:	Noam Camus <noamc@ezchip.com>
+S:	Supported
+F:	arch/arc/plat-eznps
+F:	arch/arc/boot/dts/eznps.dts
+
 F71805F HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-hwmon@vger.kernel.org
@@ -4726,6 +4732,7 @@
 M:	Timur Tabi <timur@tabi.org>
 M:	Nicolin Chen <nicoleotsuka@gmail.com>
 M:	Xiubo Li <Xiubo.Lee@gmail.com>
+R:	Fabio Estevam <fabio.estevam@nxp.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
@@ -5341,6 +5348,7 @@
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
+F:	Documentation/i2c/i2c-topology
 F:	Documentation/i2c/muxes/
 F:	Documentation/devicetree/bindings/i2c/i2c-mux*
 F:	drivers/i2c/i2c-mux.c
@@ -6485,7 +6493,7 @@
 F:	include/net/l3mdev.h
 
 LANTIQ MIPS ARCHITECTURE
-M:	John Crispin <blogic@openwrt.org>
+M:	John Crispin <john@phrozen.org>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	arch/mips/lantiq
@@ -7326,6 +7334,15 @@
 F:	Documentation/mips/
 F:	arch/mips/
 
+MIPS/LOONGSON1 ARCHITECTURE
+M:	Keguang Zhang <keguang.zhang@gmail.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	arch/mips/loongson32/
+F:	arch/mips/include/asm/mach-loongson32/
+F:	drivers/*/*loongson1*
+F:	drivers/*/*/*loongson1*
+
 MIROSOUND PCM20 FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -9245,7 +9262,7 @@
 F:	drivers/video/fbdev/aty/aty128fb.c
 
 RALINK MIPS ARCHITECTURE
-M:	John Crispin <blogic@openwrt.org>
+M:	John Crispin <john@phrozen.org>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	arch/mips/ralink
@@ -10025,6 +10042,12 @@
 S:	Supported
 F:	security/apparmor/
 
+LOADPIN SECURITY MODULE
+M:	Kees Cook <keescook@chromium.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin
+S:	Supported
+F:	security/loadpin/
+
 YAMA SECURITY MODULE
 M:	Kees Cook <keescook@chromium.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
@@ -10996,10 +11019,11 @@
 S:	Supported
 F:	drivers/clk/tegra/
 
-TEGRA DMA DRIVER
+TEGRA DMA DRIVERS
 M:	Laxman Dewangan <ldewangan@nvidia.com>
+M:	Jon Hunter <jonathanh@nvidia.com>
 S:	Supported
-F:	drivers/dma/tegra20-apb-dma.c
+F:	drivers/dma/tegra*
 
 TEGRA I2C DRIVER
 M:	Laxman Dewangan <ldewangan@nvidia.com>
diff --git a/README b/README
index afc4f0d..e8c8a6d 100644
--- a/README
+++ b/README
@@ -2,7 +2,7 @@
 
 These are the release notes for Linux version 4.  Read them carefully,
 as they tell you what this is all about, explain how to install the
-kernel, and what to do if something goes wrong. 
+kernel, and what to do if something goes wrong.
 
 WHAT IS LINUX?
 
@@ -16,7 +16,7 @@
   and multistack networking including IPv4 and IPv6.
 
   It is distributed under the GNU General Public License - see the
-  accompanying COPYING file for more details. 
+  accompanying COPYING file for more details.
 
 ON WHAT HARDWARE DOES IT RUN?
 
@@ -44,7 +44,7 @@
    system: there are much better sources available.
 
  - There are various README files in the Documentation/ subdirectory:
-   these typically contain kernel-specific installation notes for some 
+   these typically contain kernel-specific installation notes for some
    drivers for example. See Documentation/00-INDEX for a list of what
    is contained in each file.  Please read the Changes file, as it
    contains information about the problems, which may result by upgrading
@@ -276,7 +276,7 @@
    To have the build system also tell the reason for the rebuild of each
    target, use "V=2".  The default is "V=0".
 
- - Keep a backup kernel handy in case something goes wrong.  This is 
+ - Keep a backup kernel handy in case something goes wrong.  This is
    especially true for the development releases, since each new release
    contains new code which has not been debugged.  Make sure you keep a
    backup of the modules corresponding to that kernel, as well.  If you
@@ -290,7 +290,7 @@
 
  - In order to boot your new kernel, you'll need to copy the kernel
    image (e.g. .../linux/arch/i386/boot/bzImage after compilation)
-   to the place where your regular bootable kernel is found. 
+   to the place where your regular bootable kernel is found.
 
  - Booting a kernel directly from a floppy without the assistance of a
    bootloader such as LILO, is no longer supported.
@@ -303,10 +303,10 @@
    to update the loading map! If you don't, you won't be able to boot
    the new kernel image.
 
-   Reinstalling LILO is usually a matter of running /sbin/lilo. 
+   Reinstalling LILO is usually a matter of running /sbin/lilo.
    You may wish to edit /etc/lilo.conf to specify an entry for your
    old kernel image (say, /vmlinux.old) in case the new one does not
-   work.  See the LILO docs for more information. 
+   work.  See the LILO docs for more information.
 
    After reinstalling LILO, you should be all set.  Shutdown the system,
    reboot, and enjoy!
@@ -314,9 +314,9 @@
    If you ever need to change the default root device, video mode,
    ramdisk size, etc.  in the kernel image, use the 'rdev' program (or
    alternatively the LILO boot options when appropriate).  No need to
-   recompile the kernel to change these parameters. 
+   recompile the kernel to change these parameters.
 
- - Reboot with the new kernel and enjoy. 
+ - Reboot with the new kernel and enjoy.
 
 IF SOMETHING GOES WRONG:
 
@@ -383,7 +383,7 @@
    is followed by a function with a higher address you will find the one
    you want.  In fact, it may be a good idea to include a bit of
    "context" in your problem report, giving a few lines around the
-   interesting one. 
+   interesting one.
 
    If you for some reason cannot do the above (you have a pre-compiled
    kernel image or similar), telling me as much about your setup as
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 99e8d47..92c0d46 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -77,10 +77,10 @@
 	if (i >= PCI_ROM_RESOURCE)
 		return -ENODEV;
 
-	if (!__pci_mmap_fits(pdev, i, vma, sparse))
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
 		return -EINVAL;
 
-	if (iomem_is_exclusive(res->start))
+	if (!__pci_mmap_fits(pdev, i, vma, sparse))
 		return -EINVAL;
 
 	pcibios_resource_to_bus(pdev->bus, &bar, res);
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a876743..8894f7e 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -10,8 +10,9 @@
 	def_bool y
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
 	select BUILDTIME_EXTABLE_SORT
-	select COMMON_CLK
+	select CLKSRC_OF
 	select CLONE_BACKWARDS
+	select COMMON_CLK
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
@@ -30,6 +31,7 @@
 	select HAVE_MOD_ARCH_SPECIFIC if ARC_DW2_UNWIND
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
+	select HANDLE_DOMAIN_IRQ
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
@@ -95,6 +97,7 @@
 source "arch/arc/plat-tb10x/Kconfig"
 source "arch/arc/plat-axs10x/Kconfig"
 #New platform adds here
+source "arch/arc/plat-eznps/Kconfig"
 
 endmenu
 
@@ -490,6 +493,17 @@
 config ARC_PLAT_NEEDS_PHYS_TO_DMA
 	bool
 
+config ARC_KVADDR_SIZE
+	int "Kernel Virtaul Address Space size (MB)"
+	range 0 512
+	default "256"
+	help
+	  The kernel address space is carved out of 256MB of translated address
+	  space for catering to vmalloc, modules, pkmap, fixmap. This however may
+	  not suffice vmalloc requirements of a 4K CPU EZChip system. So allow
+	  this to be stretched to 512 MB (by extending into the reserved
+	  kernel-user gutter)
+
 config ARC_CURR_IN_REG
 	bool "Dedicate Register r25 for current_task pointer"
 	default y
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index def69e3..02fabef 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -115,6 +115,11 @@
 core-$(CONFIG_ARC_PLAT_SIM)	+= arch/arc/plat-sim/
 core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
 core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
+core-$(CONFIG_ARC_PLAT_EZNPS)	+= arch/arc/plat-eznps/
+
+ifdef CONFIG_ARC_PLAT_EZNPS
+KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
+endif
 
 drivers-$(CONFIG_OPROFILE)	+= arch/arc/oprofile/
 
diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi
index cfb5052..de53f5c 100644
--- a/arch/arc/boot/dts/abilis_tb10x.dtsi
+++ b/arch/arc/boot/dts/abilis_tb10x.dtsi
@@ -35,6 +35,20 @@
 		};
 	};
 
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <3>;
+		interrupt-parent = <&intc>;
+		clocks = <&cpu_clk>;
+	};
+
+	/* TIMER1 for free running clocksource */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&cpu_clk>;
+	};
+
 	soc100 {
 		#address-cells	= <1>;
 		#size-cells	= <1>;
@@ -112,7 +126,7 @@
 			chan_allocation_order = <0>;
 			chan_priority = <1>;
 			block_size = <0x7ff>;
-			data_width = <2>;
+			data-width = <4>;
 			clocks = <&ahb_clk>;
 			clock-names = "hclk";
 		};
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 420dcfd..40bcecf 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -11,6 +11,8 @@
  * Note that this file only supports the 770D CPU
  */
 
+/include/ "skeleton.dtsi"
+
 / {
 	compatible = "snps,arc";
 	clock-frequency = <750000000>;	/* 750 MHZ */
@@ -24,7 +26,13 @@
 
 		ranges = <0x00000000 0xf0000000 0x10000000>;
 
-		cpu_intc: arc700-intc@cpu {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <750000000>;
+		};
+
+		core_intc: arc700-intc@cpu {
 			compatible = "snps,arc700-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
@@ -48,7 +56,7 @@
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
-				interrupt-parent = <&cpu_intc>;
+				interrupt-parent = <&core_intc>;
 				interrupts = <15>;
 			};
 		};
@@ -86,7 +94,7 @@
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
 		interrupt-controller;
-		interrupt-parent = <&cpu_intc>;
+		interrupt-parent = <&core_intc>;
 		interrupts = < 7 >;
 	};
 
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index f90fadf..cabe0de 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -10,6 +10,8 @@
  * Device tree for AXC003 CPU card: HS38x UP configuration
  */
 
+/include/ "skeleton_hs.dtsi"
+
 / {
 	compatible = "snps,arc";
 	clock-frequency = <90000000>;
@@ -23,7 +25,13 @@
 
 		ranges = <0x00000000 0xf0000000 0x10000000>;
 
-		cpu_intc: archs-intc@cpu {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <90000000>;
+		};
+
+		core_intc: archs-intc@cpu {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
@@ -47,7 +55,7 @@
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
-				interrupt-parent = <&cpu_intc>;
+				interrupt-parent = <&core_intc>;
 				interrupts = <25>;
 			};
 		};
@@ -66,7 +74,7 @@
 		arcpct0: pct {
 			compatible = "snps,archs-pct";
 			#interrupt-cells = <1>;
-			interrupt-parent = <&cpu_intc>;
+			interrupt-parent = <&core_intc>;
 			interrupts = <20>;
 		};
 	};
@@ -89,7 +97,7 @@
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
 		interrupt-controller;
-		interrupt-parent = <&cpu_intc>;
+		interrupt-parent = <&core_intc>;
 		interrupts = < 24 >;
 	};
 
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 06a9f29..ed1674b 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -10,6 +10,8 @@
  * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc
  */
 
+/include/ "skeleton_hs_idu.dtsi"
+
 / {
 	compatible = "snps,arc";
 	clock-frequency = <90000000>;
@@ -23,7 +25,13 @@
 
 		ranges = <0x00000000 0xf0000000 0x10000000>;
 
-		cpu_intc: archs-intc@cpu {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <100000000>;
+		};
+
+		core_intc: archs-intc@cpu {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
@@ -32,7 +40,7 @@
 		idu_intc: idu-interrupt-controller {
 			compatible = "snps,archs-idu-intc";
 			interrupt-controller;
-			interrupt-parent = <&cpu_intc>;
+			interrupt-parent = <&core_intc>;
 
 			/*
 			 * <hwirq  distribution>
@@ -89,7 +97,7 @@
 		arcpct0: pct {
 			compatible = "snps,archs-pct";
 			#interrupt-cells = <1>;
-			interrupt-parent = <&cpu_intc>;
+			interrupt-parent = <&core_intc>;
 			interrupts = <20>;
 		};
 	};
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 44a578c..68c84a2 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -16,7 +16,20 @@
 		ranges = <0x00000000 0xe0000000 0x10000000>;
 		interrupt-parent = <&mb_intc>;
 
+		i2sclk: i2sclk@100a0 {
+			compatible = "snps,axs10x-i2s-pll-clock";
+			reg = <0x100a0 0x10>;
+			clocks = <&i2spll_clk>;
+			#clock-cells = <0>;
+		};
+
 		clocks {
+			i2spll_clk: i2spll_clk {
+				compatible = "fixed-clock";
+				clock-frequency = <27000000>;
+				#clock-cells = <0>;
+			};
+
 			i2cclk: i2cclk {
 				compatible = "fixed-clock";
 				clock-frequency = <50000000>;
diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts
new file mode 100644
index 0000000..b89f6c3
--- /dev/null
+++ b/arch/arc/boot/dts/eznps.dts
@@ -0,0 +1,96 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "ezchip,arc-nps";
+	clock-frequency = <83333333>;	/* 83.333333 MHZ */
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+	present-cpus = "0-1,16-17";
+	possible-cpus = "0-4095";
+
+	aliases {
+		ethernet0 = &gmac0;
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32be,0xf7209000,115200n8 console=ttyS0,115200n8";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512M */
+	};
+
+	clocks {
+		sysclk: sysclk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <83333333>;
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		intc: interrupt-controller {
+			compatible = "ezchip,nps400-ic";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		timer0: timer_clkevt {
+			compatible = "snps,arc-timer";
+			interrupts = <3>;
+			clocks = <&sysclk>;
+		};
+
+		timer1: timer_clksrc {
+			compatible = "ezchip,nps400-timer";
+			clocks = <&sysclk>;
+			clock-names="sysclk";
+		};
+
+		uart@f7209000 {
+			compatible = "snps,dw-apb-uart";
+			device_type = "serial";
+			reg = <0xf7209000 0x100>;
+			interrupts = <6>;
+			clocks = <&sysclk>;
+			clock-names="baudclk";
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			native-endian;
+		};
+
+		gmac0: ethernet@f7470000 {
+			compatible = "ezchip,nps-mgt-enet";
+			reg = <0xf7470000 0x1940>;
+			interrupts = <7>;
+			/* Filled in by U-Boot */
+			mac-address = [ 00 C0 00 F0 04 03 ];
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index 105a001..5d5e373 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -14,7 +14,7 @@
 	clock-frequency = <80000000>;	/* 80 MHZ */
 	#address-cells = <1>;
 	#size-cells = <1>;
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&core_intc>;
 
 	chosen {
 		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
@@ -32,7 +32,13 @@
 		/* child and parent address space 1:1 mapped */
 		ranges;
 
-		intc: interrupt-controller {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <80000000>;
+		};
+
+		core_intc: interrupt-controller {
 			compatible = "snps,arc700-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index f46633e..bf05fe5 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -7,7 +7,7 @@
  */
 /dts-v1/;
 
-/include/ "skeleton.dtsi"
+/include/ "skeleton_hs.dtsi"
 
 / {
 	compatible = "snps,nsim_hs";
@@ -39,6 +39,12 @@
 			 bus addr,   parent bus addr, size */
 		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
 
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <80000000>;
+		};
+
 		core_intc: core-interrupt-controller {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
index 46ab319..99eabe1 100644
--- a/arch/arc/boot/dts/nsim_hs_idu.dts
+++ b/arch/arc/boot/dts/nsim_hs_idu.dts
@@ -7,7 +7,7 @@
  */
 /dts-v1/;
 
-/include/ "skeleton.dtsi"
+/include/ "skeleton_hs_idu.dtsi"
 
 / {
 	compatible = "snps,nsim_hs";
@@ -29,6 +29,12 @@
 		/* child and parent address space 1:1 mapped */
 		ranges;
 
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <80000000>;
+		};
+
 		core_intc: core-interrupt-controller {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index d94b4ce..b5b060a 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -14,7 +14,7 @@
 	clock-frequency = <20000000>;	/* 20 MHZ */
 	#address-cells = <1>;
 	#size-cells = <1>;
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&core_intc>;
 
 	chosen {
 		/* this is for console on PGU */
@@ -35,7 +35,13 @@
 		/* child and parent address space 1:1 mapped */
 		ranges;
 
-		intc: interrupt-controller {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <20000000>;
+		};
+
+		core_intc: interrupt-controller {
 			compatible = "snps,arc700-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
index 034a313..325e730 100644
--- a/arch/arc/boot/dts/nsimosci_hs.dts
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -7,7 +7,7 @@
  */
 /dts-v1/;
 
-/include/ "skeleton.dtsi"
+/include/ "skeleton_hs.dtsi"
 
 / {
 	compatible = "snps,nsimosci_hs";
@@ -35,6 +35,12 @@
 		/* child and parent address space 1:1 mapped */
 		ranges;
 
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <20000000>;
+		};
+
 		core_intc: core-interrupt-controller {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
index 8a1297e..ee03d71 100644
--- a/arch/arc/boot/dts/nsimosci_hs_idu.dts
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -7,7 +7,7 @@
  */
 /dts-v1/;
 
-/include/ "skeleton.dtsi"
+/include/ "skeleton_hs_idu.dtsi"
 
 / {
 	compatible = "snps,nsimosci_hs";
@@ -33,6 +33,12 @@
 		/* child and parent address space 1:1 mapped */
 		ranges;
 
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <5000000>;
+		};
+
 		core_intc: core-interrupt-controller {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi
index 296d371..3a10cc6 100644
--- a/arch/arc/boot/dts/skeleton.dtsi
+++ b/arch/arc/boot/dts/skeleton.dtsi
@@ -30,6 +30,20 @@
 		};
 	};
 
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <3>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* TIMER1 for free running clocksource */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&core_clk>;
+	};
+
 	memory {
 		device_type = "memory";
 		reg = <0x80000000 0x10000000>;	/* 256M */
diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi
new file mode 100644
index 0000000..71fd308
--- /dev/null
+++ b/arch/arc/boot/dts/skeleton_hs.dtsi
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+	compatible = "snps,arc";
+	clock-frequency = <80000000>;	/* 80 MHZ */
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <0>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Local RTC: preferred clocksource for UP */
+	rtc {
+		compatible = "snps,archs-timer-rtc";
+		clocks = <&core_clk>;
+	};
+
+	/* TIMER1 for free running clocksource: Fallback if rtc not found */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&core_clk>;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+};
diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
new file mode 100644
index 0000000..d1cb25a
--- /dev/null
+++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+	compatible = "snps,arc";
+	clock-frequency = <80000000>;	/* 80 MHZ */
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38xN";
+			reg = <0>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Global Free Running Counter */
+	gfrc {
+		compatible = "snps,archs-timer-gfrc";
+		clocks = <&core_clk>;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
index 84226bd..ad4ee43 100644
--- a/arch/arc/boot/dts/vdk_axc003.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -10,6 +10,8 @@
  * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version)
  */
 
+/include/ "skeleton_hs.dtsi"
+
 / {
 	compatible = "snps,arc";
 	clock-frequency = <50000000>;
@@ -23,7 +25,13 @@
 
 		ranges = <0x00000000 0xf0000000 0x10000000>;
 
-		cpu_intc: archs-intc@cpu {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: archs-intc@cpu {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
@@ -33,7 +41,7 @@
 			compatible = "snps,dw-apb-uart";
 			reg = <0x5000 0x100>;
 			clock-frequency = <2403200>;
-			interrupt-parent = <&cpu_intc>;
+			interrupt-parent = <&core_intc>;
 			interrupts = <19>;
 			baud = <115200>;
 			reg-shift = <2>;
@@ -47,7 +55,7 @@
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
 		interrupt-controller;
-		interrupt-parent = <&cpu_intc>;
+		interrupt-parent = <&core_intc>;
 		interrupts = < 18 >;
 	};
 
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
index 31f0fb5..a3cb626 100644
--- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -11,6 +11,8 @@
  * HS38x2 (Dual Core) with IDU intc (VDK version)
  */
 
+/include/ "skeleton_hs_idu.dtsi"
+
 / {
 	compatible = "snps,arc";
 	clock-frequency = <50000000>;
@@ -24,7 +26,13 @@
 
 		ranges = <0x00000000 0xf0000000 0x10000000>;
 
-		cpu_intc: archs-intc@cpu {
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: archs-intc@cpu {
 			compatible = "snps,archs-intc";
 			interrupt-controller;
 			#interrupt-cells = <1>;
@@ -33,7 +41,7 @@
 		idu_intc: idu-interrupt-controller {
 			compatible = "snps,archs-idu-intc";
 			interrupt-controller;
-			interrupt-parent = <&cpu_intc>;
+			interrupt-parent = <&core_intc>;
 
 			/*
 			 * <hwirq  distribution>
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
new file mode 100644
index 0000000..ede625c
--- /dev/null
+++ b/arch/arc/configs/nps_defconfig
@@ -0,0 +1,84 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_EZNPS=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4096
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+# CONFIG_ARC_CACHE_PAGES is not set
+# CONFIG_ARC_HAS_LLSC is not set
+CONFIG_ARC_KVADDR_SIZE=402
+CONFIG_ARC_EMUL_UNALIGNED=y
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=2048
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7730d30..5f3dcbb 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,6 +17,8 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
+
 #define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
@@ -180,13 +182,88 @@
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, xor)
 
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
 #undef SCOND_FAIL_RETRY_VAR_DEF
 #undef SCOND_FAIL_RETRY_ASM
 #undef SCOND_FAIL_RETRY_VARS
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	int temp;
+
+	__asm__ __volatile__(
+	"	ld.di %0, [%1]"
+	: "=r"(temp)
+	: "r"(&v->counter)
+	: "memory");
+	return temp;
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	__asm__ __volatile__(
+	"	st.di %0,[%1]"
+	:
+	: "r"(i), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	:								\
+	: "r"(i), "r"(&v->counter), "i"(asm_op)				\
+	: "r2", "r3", "memory");					\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned int temp = i;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(temp)							\
+	: "r"(&v->counter), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	temp c_op i;							\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
+#define atomic_sub(i, v) atomic_add(-(i), (v))
+#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
+
+ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
+ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index a720998..b1e3274 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -30,9 +30,7 @@
 #define rmb()	asm volatile("dmb 1\n" : : : "memory")
 #define wmb()	asm volatile("dmb 2\n" : : : "memory")
 
-#endif
-
-#ifdef CONFIG_ISA_ARCOMPACT
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)  /* CONFIG_ISA_ARCOMPACT */
 
 /*
  * ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -41,6 +39,14 @@
  */
 
 #define mb()	asm volatile("sync\n" : : : "memory")
+
+#else	/* CONFIG_ARC_PLAT_EZNPS */
+
+#include <plat/ctop.h>
+
+#define mb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
+#define rmb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
+
 #endif
 
 #include <asm-generic/barrier.h>
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 0352fb8..8da87fe 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -22,7 +22,7 @@
 #include <asm/smp.h>
 #endif
 
-#if defined(CONFIG_ARC_HAS_LLSC)
+#ifdef CONFIG_ARC_HAS_LLSC
 
 /*
  * Hardware assisted Atomic-R-M-W
@@ -88,7 +88,7 @@
 	return (old & (1 << nr)) != 0;					\
 }
 
-#else	/* !CONFIG_ARC_HAS_LLSC */
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)
 
 /*
  * Non hardware assisted Atomic-R-M-W
@@ -139,7 +139,55 @@
 	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
-#endif /* CONFIG_ARC_HAS_LLSC */
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+#define BIT_OP(op, c_op, asm_op)					\
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{									\
+	m += nr >> 5;							\
+									\
+	nr = (1UL << (nr & 0x1f));					\
+	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
+		nr = ~nr;						\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"	.word %2\n"						\
+	:								\
+	: "r"(nr), "r"(m), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+}
+
+#define TEST_N_BIT_OP(op, c_op, asm_op)					\
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{									\
+	unsigned long old;						\
+									\
+	m += nr >> 5;							\
+									\
+	nr = old = (1UL << (nr & 0x1f));				\
+	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
+		old = ~old;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(old)							\
+	: "r"(m), "i"(asm_op)						\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	return (old & nr) != 0;					\
+}
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
 
 /***************************************
  * Non atomic variants
@@ -181,9 +229,15 @@
 	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
 	__TEST_N_BIT_OP(op, c_op, asm_op)
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
 BIT_OPS(set, |, bset)
 BIT_OPS(clear, & ~, bclr)
 BIT_OPS(change, ^, bxor)
+#else
+BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
+BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
+BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
+#endif
 
 /*
  * This routine doesn't need to be atomic.
diff --git a/arch/arc/include/asm/clk.h b/arch/arc/include/asm/clk.h
deleted file mode 100644
index bf9d29f..0000000
--- a/arch/arc/include/asm/clk.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_CLK_H
-#define _ASM_ARC_CLK_H
-
-/* Although we can't really hide core_freq, the accessor is still better way */
-extern unsigned long core_freq;
-
-static inline unsigned long arc_get_core_freq(void)
-{
-	return core_freq;
-}
-
-extern int arc_set_core_freq(unsigned long);
-
-#endif
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index a444be6..d819de1 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -44,7 +44,7 @@
 	return prev;
 }
 
-#else
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)
 
 static inline unsigned long
 __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
@@ -64,23 +64,48 @@
 	return prev;
 }
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
+{
+	/*
+	 * Explicit full memory barrier needed before/after
+	 */
+	smp_mb();
+
+	write_aux_reg(CTOP_AUX_GPA1, expected);
+
+	__asm__ __volatile__(
+	"	mov r2, %0\n"
+	"	mov r3, %1\n"
+	"	.word %2\n"
+	"	mov %0, r2"
+	: "+r"(new)
+	: "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
+	: "r2", "r3", "memory");
+
+	smp_mb();
+
+	return new;
+}
+
 #endif /* CONFIG_ARC_HAS_LLSC */
 
 #define cmpxchg(ptr, o, n) ((typeof(*(ptr)))__cmpxchg((ptr), \
 				(unsigned long)(o), (unsigned long)(n)))
 
 /*
- * Since not supported natively, ARC cmpxchg() uses atomic_ops_lock (UP/SMP)
- * just to gaurantee semantics.
- * atomic_cmpxchg() needs to use the same locks as it's other atomic siblings
- * which also happens to be atomic_ops_lock.
- *
- * Thus despite semantically being different, implementation of atomic_cmpxchg()
- * is same as cmpxchg().
+ * atomic_cmpxchg is same as cmpxchg
+ *   LLSC: only different in data-type, semantics are exactly same
+ *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
+ *         semantics, and this lock also happens to be used by atomic_*()
  */
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
+
 /*
  * xchg (reg with memory) based on "Native atomic" EX insn
  */
@@ -143,6 +168,41 @@
 
 #endif
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
+				   int size)
+{
+	extern unsigned long __xchg_bad_pointer(void);
+
+	switch (size) {
+	case 4:
+		/*
+		 * Explicit full memory barrier needed before/after
+		 */
+		smp_mb();
+
+		__asm__ __volatile__(
+		"	mov r2, %0\n"
+		"	mov r3, %1\n"
+		"	.word %2\n"
+		"	mov %0, r2\n"
+		: "+r"(val)
+		: "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
+		: "r2", "r3", "memory");
+
+		smp_mb();
+
+		return val;
+	}
+	return __xchg_bad_pointer();
+}
+
+#define xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
+						 sizeof(*(ptr))))
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
+
 /*
  * "atomic" variant of xchg()
  * REQ: It needs to follow the same serialization rules as other atomic_xxx()
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 1d8f57c..e0e1faf 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -36,6 +36,10 @@
 #include <asm/irqflags-compact.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#include <plat/ctop.h>
+#endif
+
 /*--------------------------------------------------------------
  * Switch to Kernel Mode stack if SP points to User Mode stack
  *
@@ -296,11 +300,13 @@
 	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
 /* Get CPU-ID of this core */
 .macro  GET_CPU_ID  reg
 	lr  \reg, [identity]
 	lsr \reg, \reg, 8
 	bmsk \reg, \reg, 7
 .endm
+#endif
 
 #endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 49014f0..c0fa0d2 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -13,21 +13,14 @@
 #define NR_IRQS		128 /* allow some CPU external IRQ handling */
 
 /* Platform Independent IRQs */
-#ifdef CONFIG_ISA_ARCOMPACT
-#define TIMER0_IRQ      3
-#define TIMER1_IRQ      4
-#else
-#define TIMER0_IRQ      16
-#define TIMER1_IRQ      17
+#ifdef CONFIG_ISA_ARCV2
+#define IPI_IRQ		19
+#define SOFTIRQ_IRQ	21
 #endif
 
 #include <linux/interrupt.h>
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
-void arc_local_timer_setup(void);
-void arc_request_percpu_irq(int irq, int cpu,
-                            irqreturn_t (*isr)(int irq, void *dev),
-                            const char *irq_nm, void *percpu_dev);
 
 #endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 0d53854..296c342 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -31,7 +31,11 @@
  * These are used to make use of C type-checking..
  */
 typedef struct {
+#ifdef CONFIG_ARC_HAS_PAE40
+	unsigned long long pte;
+#else
 	unsigned long pte;
+#endif
 } pte_t;
 typedef struct {
 	unsigned long pgd;
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 10d4b8b..034bbdc 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -217,7 +217,7 @@
 #define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
 #define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
 
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)	/* vaddr span, not PDG sz */
+#define PGDIR_SIZE	_BITUL(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define	PTRS_PER_PTE	_BITUL(BITS_FOR_PTE)
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 1d694c1..f9048994 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -57,9 +57,19 @@
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#define cpu_relax()	__asm__ __volatile__ ("" : : : "memory")
+#ifndef CONFIG_EZNPS_MTM_EXT
 
-#define cpu_relax_lowlatency() cpu_relax()
+#define cpu_relax()		barrier()
+#define cpu_relax_lowlatency()	cpu_relax()
+
+#else
+
+#define cpu_relax()     \
+	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
+
+#define cpu_relax_lowlatency()	barrier()
+
+#endif
 
 #define copy_segments(tsk, mm)      do { } while (0)
 #define release_segments(mm)        do { } while (0)
@@ -97,7 +107,7 @@
 #endif /* !__ASSEMBLY__ */
 
 /*
- * System Memory Map on ARC
+ * Default System Memory Map on ARC
  *
  * ---------------------------- (lower 2G, Translated) -------------------------
  * 0x0000_0000		0x5FFF_FFFF	(user vaddr: TASK_SIZE)
@@ -109,20 +119,37 @@
  * 0xC000_0000		0xFFFF_FFFF	(peripheral uncached space)
  * -----------------------------------------------------------------------------
  */
-#define VMALLOC_START	0x70000000
 
-/*
- * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter
- * See asm/highmem.h for details
- */
-#define VMALLOC_SIZE	(PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4)
+#define TASK_SIZE	0x60000000
+
+#define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
+
+/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
-#define USER_KERNEL_GUTTER    0x10000000
+#define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
-#define TASK_SIZE	(VMALLOC_START - USER_KERNEL_GUTTER)
-
+#ifdef CONFIG_ARC_PLAT_EZNPS
+/* NPS architecture defines special window of 129M in user address space for
+ * special memory areas, when accessing this window the MMU do not use TLB.
+ * Instead MMU direct the access to:
+ * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
+ * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
+ *
+ * CMEM - is the fastest memory we got and its size is 16K.
+ * FMT  - is used to map either to internal/external memory.
+ * Internal memory is the second fast memory and its size is 16M
+ * External memory is the biggest memory (16G) and also the slowest.
+ *
+ * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
+ */
+#define STACK_TOP       0x57e00000
+#else
 #define STACK_TOP       TASK_SIZE
+#endif
+
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 3078466..48b37c6 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -12,7 +12,11 @@
 #include <linux/types.h>
 #include <uapi/asm/setup.h>
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#define COMMAND_LINE_SIZE 2048
+#else
 #define COMMAND_LINE_SIZE 256
+#endif
 
 /*
  * Data structure to map a ID to string
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index db8c59d..800e7c4 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -610,7 +610,9 @@
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 
 	/*
@@ -623,6 +625,7 @@
 	}
 
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 
 	smp_mb();
 	return ret;
@@ -632,7 +635,9 @@
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 
 	/*
@@ -646,6 +651,7 @@
 		ret = 1;
 	}
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 
 	return ret;
 }
@@ -664,16 +670,24 @@
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 	rw->counter++;
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 }
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 	rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 }
 
 #endif
diff --git a/arch/arc/include/uapi/asm/byteorder.h b/arch/arc/include/uapi/asm/byteorder.h
index 9da71d4..ea5ca444 100644
--- a/arch/arc/include/uapi/asm/byteorder.h
+++ b/arch/arc/include/uapi/asm/byteorder.h
@@ -9,7 +9,7 @@
 #ifndef __ASM_ARC_BYTEORDER_H
 #define __ASM_ARC_BYTEORDER_H
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
+#ifdef __BIG_ENDIAN__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 1bc2036..cfcdedf 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -9,7 +9,7 @@
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 obj-y	:= arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
-obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
+obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
 obj-$(CONFIG_PCI)  			+= pcibios.o
diff --git a/arch/arc/kernel/clk.c b/arch/arc/kernel/clk.c
deleted file mode 100644
index 10c7b0b..0000000
--- a/arch/arc/kernel/clk.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/clk.h>
-
-unsigned long core_freq = 80000000;
-
-/*
- * As of now we default to device-tree provided clock
- * In future we can determine this in early boot
- */
-int arc_set_core_freq(unsigned long freq)
-{
-	core_freq = freq;
-	return 0;
-}
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 5d446df..6f4cb0d 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -16,6 +16,9 @@
 
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#include <plat/ctop.h>
+#endif
 
 #define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
 
@@ -67,9 +70,16 @@
 #ifndef CONFIG_SMP
 		"st  %2, [@_current_task]	\n\t"
 #else
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		"lr   r24, [%4]		\n\t"
+#ifndef CONFIG_EZNPS_MTM_EXT
+		"lsr  r24, r24, 4		\n\t"
+#endif
+#else
 		"lr   r24, [identity]		\n\t"
 		"lsr  r24, r24, 8		\n\t"
 		"bmsk r24, r24, 7		\n\t"
+#endif
 		"add2 r24, @_current_task, r24	\n\t"
 		"st   %2,  [r24]		\n\t"
 #endif
@@ -107,6 +117,9 @@
 
 		: "=r"(tmp)
 		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		, "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
+#endif
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 7e844fd..f1e07c2 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -14,7 +14,6 @@
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <asm/clk.h>
 #include <asm/mach_desc.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
@@ -28,14 +27,12 @@
 
 static void __init arc_set_early_base_baud(unsigned long dt_root)
 {
-	unsigned int core_clk = arc_get_core_freq();
-
 	if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
-		arc_base_baud = core_clk/3;
+		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x) */
 	else
-		arc_base_baud = core_clk;
+		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 }
 #else
 #define arc_set_early_base_baud(dt_root)
@@ -65,8 +62,6 @@
 {
 	const struct machine_desc *mdesc;
 	unsigned long dt_root;
-	const void *clk;
-	int len;
 
 	if (!early_init_dt_scan(dt))
 		return NULL;
@@ -76,10 +71,6 @@
 		machine_halt();
 
 	dt_root = of_get_flat_dt_root();
-	clk = of_get_flat_dt_prop(dt_root, "clock-frequency", &len);
-	if (clk)
-		arc_set_core_freq(of_read_ulong(clk, len/4));
-
 	arc_set_early_base_baud(dt_root);
 
 	return mdesc;
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 9425263..6c24faf 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -137,23 +137,30 @@
 	.map = arcv2_irq_map,
 };
 
-static struct irq_domain *root_domain;
 
 static int __init
 init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 {
+	struct irq_domain *root_domain;
+
 	if (parent)
 		panic("DeviceTree incore intc not a root irq controller\n");
 
-	root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
-					    &arcv2_irq_ops, NULL);
-
+	root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS, &arcv2_irq_ops, NULL);
 	if (!root_domain)
 		panic("root irq domain not avail\n");
 
-	/* with this we don't need to export root_domain */
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
 	irq_set_default_host(root_domain);
 
+#ifdef CONFIG_SMP
+	irq_create_mapping(root_domain, IPI_IRQ);
+#endif
+	irq_create_mapping(root_domain, SOFTIRQ_IRQ);
+
 	return 0;
 }
 
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 224d1c3..c5cceca 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -14,6 +14,8 @@
 #include <linux/irqchip.h>
 #include <asm/irq.h>
 
+#define TIMER0_IRQ	3	/* Fixed by ISA */
+
 /*
  * Early Hardware specific Interrupt setup
  * -Platform independent, needed for each CPU (not foldable into init_IRQ)
@@ -79,8 +81,9 @@
 static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
 			       irq_hw_number_t hw)
 {
-	switch (irq) {
+	switch (hw) {
 	case TIMER0_IRQ:
+		irq_set_percpu_devid(irq);
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
 		break;
 	default:
@@ -94,21 +97,23 @@
 	.map = arc_intc_domain_map,
 };
 
-static struct irq_domain *root_domain;
-
 static int __init
 init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
 {
+	struct irq_domain *root_domain;
+
 	if (parent)
 		panic("DeviceTree incore intc not a root irq controller\n");
 
-	root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+	root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS,
 					    &arc_intc_domain_ops, NULL);
-
 	if (!root_domain)
 		panic("root irq domain not avail\n");
 
-	/* with this we don't need to export root_domain */
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
 	irq_set_default_host(root_domain);
 
 	return 0;
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index ba17f85..538b36a 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -41,53 +41,7 @@
  * "C" Entry point for any ARC ISR, called from low level vector handler
  * @irq is the vector number read from ICAUSE reg of on-chip intc
  */
-void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
+void arch_do_IRQ(unsigned int hwirq, struct pt_regs *regs)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	irq_enter();
-	generic_handle_irq(irq);
-	irq_exit();
-	set_irq_regs(old_regs);
-}
-
-/*
- * API called for requesting percpu interrupts - called by each CPU
- *  - For boot CPU, actually request the IRQ with genirq core + enables
- *  - For subsequent callers only enable called locally
- *
- * Relies on being called by boot cpu first (i.e. request called ahead) of
- * any enable as expected by genirq. Hence Suitable only for TIMER, IPI
- * which are guaranteed to be setup on boot core first.
- * Late probed peripherals such as perf can't use this as there no guarantee
- * of being called on boot CPU first.
- */
-
-void arc_request_percpu_irq(int irq, int cpu,
-                            irqreturn_t (*isr)(int irq, void *dev),
-                            const char *irq_nm,
-                            void *percpu_dev)
-{
-	/* Boot cpu calls request, all call enable */
-	if (!cpu) {
-		int rc;
-
-#ifdef CONFIG_ISA_ARCOMPACT
-		/*
-		 * A subsequent request_percpu_irq() fails if percpu_devid is
-		 * not set. That in turns sets NOAUTOEN, meaning each core needs
-		 * to call enable_percpu_irq()
-		 *
-		 * For ARCv2, this is done in irq map function since we know
-		 * which irqs are strictly per cpu
-		 */
-		irq_set_percpu_devid(irq);
-#endif
-
-		rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev);
-		if (rc)
-			panic("Percpu IRQ request failed for %d\n", irq);
-	}
-
-	enable_percpu_irq(irq, 0);
+	handle_domain_irq(NULL, hwirq, regs);
 }
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index c41c364..72f9179 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -15,9 +15,6 @@
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
-#define IPI_IRQ		19
-#define SOFTIRQ_IRQ	21
-
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
@@ -116,15 +113,13 @@
 		IS_AVAIL1(mp.dbg, "DEBUG "),
 		IS_AVAIL1(mp.gfrc, "GFRC"));
 
+	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
 	idu_detected = mp.idu;
 
 	if (mp.dbg) {
 		__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
 		__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
 	}
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_GFRC) && !mp.gfrc)
-		panic("kernel trying to use non-existent GFRC\n");
 }
 
 struct plat_smp_ops plat_smp_ops = {
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 151acf0..f63b8bf 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -13,7 +13,6 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
-#include <linux/clk-provider.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/cache.h>
@@ -24,7 +23,6 @@
 #include <asm/page.h>
 #include <asm/irq.h>
 #include <asm/unwind.h>
-#include <asm/clk.h>
 #include <asm/mach_desc.h>
 #include <asm/smp.h>
 
@@ -220,10 +218,6 @@
 	if (tbl->info.id == 0)
 		n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n");
 
-	n += scnprintf(buf + n, len - n, "CPU speed\t: %u.%02u Mhz\n",
-		       (unsigned int)(arc_get_core_freq() / 1000000),
-		       (unsigned int)(arc_get_core_freq() / 10000) % 100);
-
 	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
 		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
 		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
@@ -314,9 +308,6 @@
 	if (!cpu->extn.timer1)
 		panic("Timer1 is not present!\n");
 
-	if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->extn.rtc)
-		panic("RTC is not present\n");
-
 #ifdef CONFIG_ARC_HAS_DCCM
 	/*
 	 * DCCM can be arbit placed in hardware.
@@ -444,7 +435,6 @@
 
 static int __init customize_machine(void)
 {
-	of_clk_init(NULL);
 	/*
 	 * Traverses flattened DeviceTree - registering platform devices
 	 * (if any) complete with their resources
@@ -477,6 +467,8 @@
 {
 	char *str;
 	int cpu_id = ptr_to_cpu(v);
+	struct device_node *core_clk = of_find_node_by_name(NULL, "core_clk");
+	u32 freq = 0;
 
 	if (!cpu_online(cpu_id)) {
 		seq_printf(m, "processor [%d]\t: Offline\n", cpu_id);
@@ -489,6 +481,11 @@
 
 	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
 
+	of_property_read_u32(core_clk, "clock-frequency", &freq);
+	if (freq)
+		seq_printf(m, "CPU speed\t: %u.%02u Mhz\n",
+			   freq / 1000000, (freq / 10000) % 100);
+
 	seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
 		   loops_per_jiffy / (500000 / HZ),
 		   (loops_per_jiffy / (5000 / HZ)) % 100);
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 4cb3add..f183cc6 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -126,11 +126,6 @@
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
-	notify_cpu_starting(cpu);
-	set_cpu_online(cpu, true);
-
-	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
-
 	/* Some SMP H/w setup - for each cpu */
 	if (plat_smp_ops.init_per_cpu)
 		plat_smp_ops.init_per_cpu(cpu);
@@ -138,7 +133,10 @@
 	if (machine_desc->init_per_cpu)
 		machine_desc->init_per_cpu(cpu);
 
-	arc_local_timer_setup();
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
 	local_irq_enable();
 	preempt_disable();
@@ -346,6 +344,10 @@
 
 /*
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
+ *
+ * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
+ * function needs to call call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * request_percpu_irq() below will fail
  */
 static DEFINE_PER_CPU(int, ipi_dev);
 
@@ -353,7 +355,16 @@
 {
 	int *dev = per_cpu_ptr(&ipi_dev, cpu);
 
-	arc_request_percpu_irq(irq, cpu, do_IPI, "IPI Interrupt", dev);
+	/* Boot cpu calls request, all call enable */
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev);
+		if (rc)
+			panic("Percpu IRQ request failed for %d\n", irq);
+	}
+
+	enable_percpu_irq(irq, 0);
 
 	return 0;
 }
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 7d9a736..4549ab2 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -29,21 +29,16 @@
  * which however is currently broken
  */
 
-#include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 #include <asm/irq.h>
 #include <asm/arcregs.h>
-#include <asm/clk.h>
-#include <asm/mach_desc.h>
 
 #include <asm/mcip.h>
 
@@ -60,16 +55,35 @@
 
 #define ARC_TIMER_MAX	0xFFFFFFFF
 
+static unsigned long arc_timer_freq;
+
+static int noinline arc_get_timer_clk(struct device_node *node)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_err("timer missing clk");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Couldn't enable parent clk\n");
+		return ret;
+	}
+
+	arc_timer_freq = clk_get_rate(clk);
+
+	return 0;
+}
+
 /********** Clock Source Device *********/
 
 #ifdef CONFIG_ARC_HAS_GFRC
 
-static int arc_counter_setup(void)
-{
-	return 1;
-}
-
-static cycle_t arc_counter_read(struct clocksource *cs)
+static cycle_t arc_read_gfrc(struct clocksource *cs)
 {
 	unsigned long flags;
 	union {
@@ -94,15 +108,31 @@
 	return stamp.full;
 }
 
-static struct clocksource arc_counter = {
+static struct clocksource arc_counter_gfrc = {
 	.name   = "ARConnect GFRC",
 	.rating = 400,
-	.read   = arc_counter_read,
+	.read   = arc_read_gfrc,
 	.mask   = CLOCKSOURCE_MASK(64),
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#else
+static void __init arc_cs_setup_gfrc(struct device_node *node)
+{
+	int exists = cpuinfo_arc700[0].extn.gfrc;
+	int ret;
+
+	if (WARN(!exists, "Global-64-bit-Ctr clocksource not detected"))
+		return;
+
+	ret = arc_get_timer_clk(node);
+	if (ret)
+		return;
+
+	clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
+}
+CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
+
+#endif
 
 #ifdef CONFIG_ARC_HAS_RTC
 
@@ -110,15 +140,7 @@
 #define AUX_RTC_LOW	0x104
 #define AUX_RTC_HIGH	0x105
 
-int arc_counter_setup(void)
-{
-	write_aux_reg(AUX_RTC_CTRL, 1);
-
-	/* Not usable in SMP */
-	return !IS_ENABLED(CONFIG_SMP);
-}
-
-static cycle_t arc_counter_read(struct clocksource *cs)
+static cycle_t arc_read_rtc(struct clocksource *cs)
 {
 	unsigned long status;
 	union {
@@ -142,47 +164,78 @@
 	return stamp.full;
 }
 
-static struct clocksource arc_counter = {
+static struct clocksource arc_counter_rtc = {
 	.name   = "ARCv2 RTC",
 	.rating = 350,
-	.read   = arc_counter_read,
+	.read   = arc_read_rtc,
 	.mask   = CLOCKSOURCE_MASK(64),
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#else /* !CONFIG_ARC_HAS_RTC */
+static void __init arc_cs_setup_rtc(struct device_node *node)
+{
+	int exists = cpuinfo_arc700[smp_processor_id()].extn.rtc;
+	int ret;
+
+	if (WARN(!exists, "Local-64-bit-Ctr clocksource not detected"))
+		return;
+
+	/* Local to CPU hence not usable in SMP */
+	if (WARN(IS_ENABLED(CONFIG_SMP), "Local-64-bit-Ctr not usable in SMP"))
+		return;
+
+	ret = arc_get_timer_clk(node);
+	if (ret)
+		return;
+
+	write_aux_reg(AUX_RTC_CTRL, 1);
+
+	clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
+}
+CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
+
+#endif
 
 /*
- * set 32bit TIMER1 to keep counting monotonically and wraparound
+ * 32bit TIMER1 to keep counting monotonically and wraparound
  */
-int arc_counter_setup(void)
-{
-	write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX);
-	write_aux_reg(ARC_REG_TIMER1_CNT, 0);
-	write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
 
-	/* Not usable in SMP */
-	return !IS_ENABLED(CONFIG_SMP);
-}
-
-static cycle_t arc_counter_read(struct clocksource *cs)
+static cycle_t arc_read_timer1(struct clocksource *cs)
 {
 	return (cycle_t) read_aux_reg(ARC_REG_TIMER1_CNT);
 }
 
-static struct clocksource arc_counter = {
+static struct clocksource arc_counter_timer1 = {
 	.name   = "ARC Timer1",
 	.rating = 300,
-	.read   = arc_counter_read,
+	.read   = arc_read_timer1,
 	.mask   = CLOCKSOURCE_MASK(32),
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#endif
-#endif
+static void __init arc_cs_setup_timer1(struct device_node *node)
+{
+	int ret;
+
+	/* Local to CPU hence not usable in SMP */
+	if (IS_ENABLED(CONFIG_SMP))
+		return;
+
+	ret = arc_get_timer_clk(node);
+	if (ret)
+		return;
+
+	write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX);
+	write_aux_reg(ARC_REG_TIMER1_CNT, 0);
+	write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
+
+	clocksource_register_hz(&arc_counter_timer1, arc_timer_freq);
+}
 
 /********** Clock Event Device *********/
 
+static int arc_timer_irq;
+
 /*
  * Arm the timer to interrupt after @cycles
  * The distinction for oneshot/periodic is done in arc_event_timer_ack() below
@@ -209,7 +262,7 @@
 	 * At X Hz, 1 sec = 1000ms -> X cycles;
 	 *		      10ms -> X / 100 cycles
 	 */
-	arc_timer_event_setup(arc_get_core_freq() / HZ);
+	arc_timer_event_setup(arc_timer_freq / HZ);
 	return 0;
 }
 
@@ -218,7 +271,6 @@
 	.features		= CLOCK_EVT_FEAT_ONESHOT |
 				  CLOCK_EVT_FEAT_PERIODIC,
 	.rating			= 300,
-	.irq			= TIMER0_IRQ,	/* hardwired, no need for resources */
 	.set_next_event		= arc_clkevent_set_next_event,
 	.set_state_periodic	= arc_clkevent_set_periodic,
 };
@@ -244,45 +296,81 @@
 	return IRQ_HANDLED;
 }
 
-/*
- * Setup the local event timer for @cpu
- */
-void arc_local_timer_setup()
+static int arc_timer_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
-	int cpu = smp_processor_id();
 
-	evt->cpumask = cpumask_of(cpu);
-	clockevents_config_and_register(evt, arc_get_core_freq(),
+	evt->cpumask = cpumask_of(smp_processor_id());
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		clockevents_config_and_register(evt, arc_timer_freq,
+						0, ULONG_MAX);
+		enable_percpu_irq(arc_timer_irq, 0);
+		break;
+	case CPU_DYING:
+		disable_percpu_irq(arc_timer_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block arc_timer_cpu_nb = {
+	.notifier_call = arc_timer_cpu_notify,
+};
+
+/*
+ * clockevent setup for boot CPU
+ */
+static void __init arc_clockevent_setup(struct device_node *node)
+{
+	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
+	int ret;
+
+	register_cpu_notifier(&arc_timer_cpu_nb);
+
+	arc_timer_irq = irq_of_parse_and_map(node, 0);
+	if (arc_timer_irq <= 0)
+		panic("clockevent: missing irq");
+
+	ret = arc_get_timer_clk(node);
+	if (ret)
+		panic("clockevent: missing clk");
+
+	evt->irq = arc_timer_irq;
+	evt->cpumask = cpumask_of(smp_processor_id());
+	clockevents_config_and_register(evt, arc_timer_freq,
 					0, ARC_TIMER_MAX);
 
-	/* setup the per-cpu timer IRQ handler - for all cpus */
-	arc_request_percpu_irq(TIMER0_IRQ, cpu, timer_irq_handler,
-			       "Timer0 (per-cpu-tick)", evt);
+	/* Needs apriori irq_set_percpu_devid() done in intc map function */
+	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
+				 "Timer0 (per-cpu-tick)", evt);
+	if (ret)
+		panic("clockevent: unable to request irq\n");
+
+	enable_percpu_irq(arc_timer_irq, 0);
 }
 
+static void __init arc_of_timer_init(struct device_node *np)
+{
+	static int init_count = 0;
+
+	if (!init_count) {
+		init_count = 1;
+		arc_clockevent_setup(np);
+	} else {
+		arc_cs_setup_timer1(np);
+	}
+}
+CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
+
 /*
  * Called from start_kernel() - boot CPU only
- *
- * -Sets up h/w timers as applicable on boot cpu
- * -Also sets up any global state needed for timer subsystem:
- *    - for "counting" timer, registers a clocksource, usable across CPUs
- *      (provided that underlying counter h/w is synchronized across cores)
- *    - for "event" timer, sets up TIMER0 IRQ (as that is platform agnostic)
  */
 void __init time_init(void)
 {
-	/*
-	 * sets up the timekeeping free-flowing counter which also returns
-	 * whether the counter is usable as clocksource
-	 */
-	if (arc_counter_setup())
-		/*
-		 * CLK upto 4.29 GHz can be safely represented in 32 bits
-		 * because Max 32 bit number is 4,294,967,295
-		 */
-		clocksource_register_hz(&arc_counter, arc_get_core_freq());
-
-	/* sets up the periodic event timer */
-	arc_local_timer_setup();
+	of_clk_init(NULL);
+	clocksource_probe();
 }
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7046c12..ec868a9 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -814,6 +814,17 @@
 
 	printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
+	/*
+	 * Can't be done in processor.h due to header include depenedencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
+
+	/*
+	 * stack top size sanity check,
+	 * Can't be done in processor.h due to header include depenedencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
+
 	/* For efficiency sake, kernel is compile time built for a MMU ver
 	 * This must match the hardware it is running on.
 	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 1b0f0f4..8654870 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -14,10 +14,11 @@
  *
  */
 
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
+#include <linux/libfdt.h>
 
 #include <asm/asm-offsets.h>
-#include <asm/clk.h>
 #include <asm/io.h>
 #include <asm/mach_desc.h>
 #include <asm/mcip.h>
@@ -389,6 +390,13 @@
 
 static void __init axs103_early_init(void)
 {
+	int offset = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
+	const struct fdt_property *prop = fdt_get_property(initial_boot_params,
+							   offset,
+							   "clock-frequency",
+							   NULL);
+	u32 freq = be32_to_cpu(*(u32*)(prop->data)) / 1000000, orig = freq;
+
 	/*
 	 * AXS103 configurations for SMP/QUAD configurations share device tree
 	 * which defaults to 90 MHz. However recent failures of Quad config
@@ -401,12 +409,10 @@
 #ifdef CONFIG_ARC_MCIP
 	unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
 	if (num_cores > 2)
-		arc_set_core_freq(50 * 1000000);
-	else if (num_cores == 2)
-		arc_set_core_freq(75 * 1000000);
+		freq = 50;
 #endif
 
-	switch (arc_get_core_freq()/1000000) {
+	switch (freq) {
 	case 33:
 		axs103_set_freq(1, 1, 1);
 		break;
@@ -431,11 +437,18 @@
 		 * DT "clock-frequency" might not match with board value.
 		 * Hence update it to match the board value.
 		 */
-		arc_set_core_freq(axs103_get_freq() * 1000000);
+		freq = axs103_get_freq();
 		break;
 	}
 
-	pr_info("Freq is %dMHz\n", axs103_get_freq());
+	pr_info("Freq is %dMHz\n", freq);
+
+	/* Patching .dtb in-place with new core clock value */
+	if (freq != orig ) {
+		freq = cpu_to_be32(freq * 1000000);
+		fdt_setprop_inplace(initial_boot_params, offset,
+				    "clock-frequency", &freq, sizeof(freq));
+	}
 
 	/* Memory maps already config in pre-bootloader */
 
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
new file mode 100644
index 0000000..1d175cc
--- /dev/null
+++ b/arch/arc/plat-eznps/Kconfig
@@ -0,0 +1,35 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+menuconfig ARC_PLAT_EZNPS
+	bool "\"EZchip\" ARC dev platform"
+	select ARC_HAS_COH_CACHES if SMP
+	select CPU_BIG_ENDIAN
+	select CLKSRC_NPS
+	select EZNPS_GIC
+	select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
+	help
+	  Support for EZchip development platforms,
+	  based on ARC700 cores.
+	  We handle few flavours:
+	    - Hardware Emulator AKA HE which is FPGA based chasis
+	    - Simulator based on MetaWare nSIM
+	    - NPS400 chip based on ASIC
+
+config EZNPS_MTM_EXT
+	bool "ARC-EZchip MTM Extensions"
+	select CPUMASK_OFFSTACK
+	depends on ARC_PLAT_EZNPS && SMP
+	default y
+	help
+	  Here we add new hierarchy for CPUs topology.
+	  We got:
+		Core
+		Thread
+	  At the new thread level each CPU represent one HW thread.
+	  At highest hierarchy each core contain 16 threads,
+	  any of them seem like CPU from Linux point of view.
+	  All threads within same core share the execution unit of the
+	  core and HW scheduler round robin between them.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
new file mode 100644
index 0000000..21091b1
--- /dev/null
+++ b/arch/arc/plat-eznps/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y := entry.o platform.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
new file mode 100644
index 0000000..328261c
--- /dev/null
+++ b/arch/arc/plat-eznps/entry.S
@@ -0,0 +1,70 @@
+/*******************************************************************************
+
+  EZNPS CPU startup Code
+  Copyright(c) 2012 EZchip Technologies.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+  more details.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+*******************************************************************************/
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/cache.h>
+#include <plat/ctop.h>
+
+	.cpu A7
+
+	.section .init.text, "ax",@progbits
+	.align 1024	; HW requierment for restart first PC
+
+ENTRY(res_service)
+#ifdef CONFIG_EZNPS_MTM_EXT
+	; There is no work for HW thread id != 0
+	lr	r3, [CTOP_AUX_THREAD_ID]
+	cmp	r3, 0
+	jne	stext
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+	; With no cache coherency mechanism D$ need to be used very carefully.
+	; Address space:
+	; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES.
+	; 2G-3G: We disable D$ by setting this bit.
+	; 3G-4G: D$ is disabled by architecture.
+	; FMT are huge pages for user application reside at 0-2G.
+	; Only FMT left as one who can use D$ where each such page got
+	; disable/enable bit for cachability.
+	; Programmer will use FMT pages for private data so cache coherency
+	; would not be a problem.
+	; First thing we invalidate D$
+	sr	1, [ARC_REG_DC_IVDC]
+	sr	HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY]
+#endif
+
+#ifdef CONFIG_SMP
+	; We set logical cpuid to be used by GET_CPUID
+	; We do not use physical cpuid since we want ids to be continious when
+	; it comes to cpus on the same quad cluster.
+	; This is useful for applications that used shared resources of a quad
+	; cluster such SRAMS.
+	lr 	r3, [CTOP_AUX_CORE_ID]
+	sr	r3, [CTOP_AUX_LOGIC_CORE_ID]
+	lr	r3, [CTOP_AUX_CLUSTER_ID]
+	; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit)
+	; r3 is used since we use short instruction and we need q-class reg
+	.short	CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST
+	.word 	CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM
+	 sr	r3, [CTOP_AUX_LOGIC_CLUSTER_ID]
+#endif
+
+	j	stext
+END(res_service)
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
new file mode 100644
index 0000000..9d6718c
--- /dev/null
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef _PLAT_EZNPS_CTOP_H
+#define _PLAT_EZNPS_CTOP_H
+
+#ifndef CONFIG_ARC_PLAT_EZNPS
+#error "Incorrect ctop.h include"
+#endif
+
+#include <soc/nps/common.h>
+
+/* core auxiliary registers */
+#ifdef __ASSEMBLY__
+#define CTOP_AUX_BASE				(-0x800)
+#else
+#define CTOP_AUX_BASE				0xFFFFF800
+#endif
+
+#define CTOP_AUX_GLOBAL_ID			(CTOP_AUX_BASE + 0x000)
+#define CTOP_AUX_CLUSTER_ID			(CTOP_AUX_BASE + 0x004)
+#define CTOP_AUX_CORE_ID			(CTOP_AUX_BASE + 0x008)
+#define CTOP_AUX_THREAD_ID			(CTOP_AUX_BASE + 0x00C)
+#define CTOP_AUX_LOGIC_GLOBAL_ID		(CTOP_AUX_BASE + 0x010)
+#define CTOP_AUX_LOGIC_CLUSTER_ID		(CTOP_AUX_BASE + 0x014)
+#define CTOP_AUX_LOGIC_CORE_ID			(CTOP_AUX_BASE + 0x018)
+#define CTOP_AUX_MT_CTRL			(CTOP_AUX_BASE + 0x020)
+#define CTOP_AUX_HW_COMPLY			(CTOP_AUX_BASE + 0x024)
+#define CTOP_AUX_LPC				(CTOP_AUX_BASE + 0x030)
+#define CTOP_AUX_EFLAGS				(CTOP_AUX_BASE + 0x080)
+#define CTOP_AUX_IACK				(CTOP_AUX_BASE + 0x088)
+#define CTOP_AUX_GPA1				(CTOP_AUX_BASE + 0x08C)
+#define CTOP_AUX_UDMC				(CTOP_AUX_BASE + 0x300)
+
+/* EZchip core instructions */
+#define CTOP_INST_HWSCHD_OFF_R3			0x3B6F00BF
+#define CTOP_INST_HWSCHD_OFF_R4			0x3C6F00BF
+#define CTOP_INST_HWSCHD_RESTORE_R3		0x3E6F70C3
+#define CTOP_INST_HWSCHD_RESTORE_R4		0x3E6F7103
+#define CTOP_INST_SCHD_RW			0x3E6F7004
+#define CTOP_INST_SCHD_RD			0x3E6F7084
+#define CTOP_INST_ASRI_0_R3			0x3B56003E
+#define CTOP_INST_XEX_DI_R2_R2_R3		0x4A664C00
+#define CTOP_INST_EXC_DI_R2_R2_R3		0x4A664C01
+#define CTOP_INST_AADD_DI_R2_R2_R3		0x4A664C02
+#define CTOP_INST_AAND_DI_R2_R2_R3		0x4A664C04
+#define CTOP_INST_AOR_DI_R2_R2_R3		0x4A664C05
+#define CTOP_INST_AXOR_DI_R2_R2_R3		0x4A664C06
+
+/* Do not use D$ for address in 2G-3G */
+#define HW_COMPLY_KRN_NOT_D_CACHED		_BITUL(28)
+
+#define NPS_MSU_EN_CFG				0x80
+#define NPS_CRG_BLKID				0x480
+#define NPS_CRG_SYNC_BIT			_BITUL(0)
+#define NPS_GIM_BLKID				0x5C0
+
+/* GIM registers and fields*/
+#define NPS_GIM_UART_LINE			_BITUL(7)
+#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE	_BITUL(10)
+#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE	_BITUL(11)
+#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE	_BITUL(25)
+#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE	_BITUL(26)
+
+#ifndef __ASSEMBLY__
+/* Functional registers definition */
+struct nps_host_reg_mtm_cfg {
+	union {
+		struct {
+			u32 gen:1, gdis:1, clk_gate_dis:1, asb:1,
+			__reserved:9, nat:3, ten:16;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_mtm_cpu_cfg {
+	union {
+		struct {
+			u32 csa:22, dmsid:6, __reserved:3, cs:1;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_thr_init {
+	union {
+		struct {
+			u32 str:1, __reserved:27, thr_id:4;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_thr_init_sts {
+	union {
+		struct {
+			u32 bsy:1, err:1, __reserved:26, thr_id:4;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_msu_en_cfg {
+	union {
+		struct {
+			u32     __reserved1:11,
+			rtc_en:1, ipc_en:1, gim_1_en:1,
+			gim_0_en:1, ipi_en:1, buff_e_rls_bmuw:1,
+			buff_e_alc_bmuw:1, buff_i_rls_bmuw:1, buff_i_alc_bmuw:1,
+			buff_e_rls_bmue:1, buff_e_alc_bmue:1, buff_i_rls_bmue:1,
+			buff_i_alc_bmue:1, __reserved2:1, buff_e_pre_en:1,
+			buff_i_pre_en:1, pmuw_ja_en:1, pmue_ja_en:1,
+			pmuw_nj_en:1, pmue_nj_en:1, msu_en:1;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_gim_p_int_dst {
+	union {
+		struct {
+			u32 int_out_en:1, __reserved1:4,
+			is:1, intm:2, __reserved2:4,
+			nid:4, __reserved3:4, cid:4,
+			 __reserved4:4, tid:4;
+		};
+		u32 value;
+	};
+};
+
+/* AUX registers definition */
+struct nps_host_reg_aux_udmc {
+	union {
+		struct {
+			u32 dcp:1, cme:1, __reserved:19, nat:3,
+			__reserved2:5, dcas:3;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_aux_mt_ctrl {
+	union {
+		struct {
+			u32 mten:1, hsen:1, scd:1, sten:1,
+			st_cnt:8, __reserved:8,
+			hs_cnt:8, __reserved1:4;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_aux_hw_comply {
+	union {
+		struct {
+			u32 me:1, le:1, te:1, knc:1, __reserved:28;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_aux_lpc {
+	union {
+		struct {
+			u32 mep:1, __reserved:31;
+		};
+		u32 value;
+	};
+};
+
+/* CRG registers */
+#define REG_GEN_PURP_0          nps_host_reg_non_cl(NPS_CRG_BLKID, 0x1BF)
+
+/* GIM registers */
+#define REG_GIM_P_INT_EN_0      nps_host_reg_non_cl(NPS_GIM_BLKID, 0x100)
+#define REG_GIM_P_INT_POL_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x110)
+#define REG_GIM_P_INT_SENS_0    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x114)
+#define REG_GIM_P_INT_BLK_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x118)
+#define REG_GIM_P_INT_DST_10    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13A)
+#define REG_GIM_P_INT_DST_11    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13B)
+#define REG_GIM_P_INT_DST_25    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x149)
+#define REG_GIM_P_INT_DST_26    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x14A)
+
+#else
+
+.macro  GET_CPU_ID  reg
+	lr  \reg, [CTOP_AUX_LOGIC_GLOBAL_ID]
+#ifndef CONFIG_EZNPS_MTM_EXT
+	lsr \reg, \reg, 4
+#endif
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _PLAT_EZNPS_CTOP_H */
diff --git a/arch/arc/plat-eznps/include/plat/mtm.h b/arch/arc/plat-eznps/include/plat/mtm.h
new file mode 100644
index 0000000..29b91b5
--- /dev/null
+++ b/arch/arc/plat-eznps/include/plat/mtm.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef _PLAT_EZNPS_MTM_H
+#define _PLAT_EZNPS_MTM_H
+
+#include <plat/ctop.h>
+
+static inline void *nps_mtm_reg_addr(u32 cpu, u32 reg)
+{
+	struct global_id gid;
+	u32 core, blkid;
+
+	gid.value = cpu;
+	core = gid.core;
+	blkid = (((core & 0x0C) << 2) | (core & 0x03));
+
+	return nps_host_reg(cpu, blkid, reg);
+}
+
+#ifdef CONFIG_EZNPS_MTM_EXT
+#define NPS_CPU_TO_THREAD_NUM(cpu) \
+	({ struct global_id gid; gid.value = cpu; gid.thread; })
+
+/* MTM registers */
+#define MTM_CFG(cpu)			nps_mtm_reg_addr(cpu, 0x81)
+#define MTM_THR_INIT(cpu)		nps_mtm_reg_addr(cpu, 0x92)
+#define MTM_THR_INIT_STS(cpu)		nps_mtm_reg_addr(cpu, 0x93)
+
+#define get_thread(map) map.thread
+#define eznps_max_cpus 4096
+#define eznps_cpus_per_cluster	256
+
+void mtm_enable_core(unsigned int cpu);
+int mtm_enable_thread(int cpu);
+#else /* !CONFIG_EZNPS_MTM_EXT */
+
+#define get_thread(map) 0
+#define eznps_max_cpus 256
+#define eznps_cpus_per_cluster	16
+#define mtm_enable_core(cpu)
+#define mtm_enable_thread(cpu) 1
+#define NPS_CPU_TO_THREAD_NUM(cpu) 0
+
+#endif /* CONFIG_EZNPS_MTM_EXT */
+
+#endif /* _PLAT_EZNPS_MTM_H */
diff --git a/arch/arc/plat-eznps/include/plat/smp.h b/arch/arc/plat-eznps/include/plat/smp.h
new file mode 100644
index 0000000..06b59bd
--- /dev/null
+++ b/arch/arc/plat-eznps/include/plat/smp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#ifndef __PLAT_EZNPS_SMP_H
+#define __PLAT_EZNPS_SMP_H
+
+#ifdef CONFIG_SMP
+
+extern void res_service(void);
+
+#endif /* CONFIG_SMP */
+
+#endif
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
new file mode 100644
index 0000000..aaaaffd
--- /dev/null
+++ b/arch/arc/plat-eznps/mtm.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <asm/arcregs.h>
+#include <plat/mtm.h>
+#include <plat/smp.h>
+
+#define MT_CTRL_HS_CNT		0xFF
+#define MT_CTRL_ST_CNT		0xF
+#define NPS_NUM_HW_THREADS	0x10
+
+static void mtm_init_nat(int cpu)
+{
+	struct nps_host_reg_mtm_cfg mtm_cfg;
+	struct nps_host_reg_aux_udmc udmc;
+	int log_nat, nat = 0, i, t;
+
+	/* Iterate core threads and update nat */
+	for (i = 0, t = cpu; i < NPS_NUM_HW_THREADS; i++, t++)
+		nat += test_bit(t, cpumask_bits(cpu_possible_mask));
+
+	log_nat = ilog2(nat);
+
+	udmc.value = read_aux_reg(CTOP_AUX_UDMC);
+	udmc.nat = log_nat;
+	write_aux_reg(CTOP_AUX_UDMC, udmc.value);
+
+	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
+	mtm_cfg.nat = log_nat;
+	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
+}
+
+static void mtm_init_thread(int cpu)
+{
+	int i, tries = 5;
+	struct nps_host_reg_thr_init thr_init;
+	struct nps_host_reg_thr_init_sts thr_init_sts;
+
+	/* Set thread init register */
+	thr_init.value = 0;
+	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
+	thr_init.thr_id = NPS_CPU_TO_THREAD_NUM(cpu);
+	thr_init.str = 1;
+	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
+
+	/* Poll till thread init is done */
+	for (i = 0; i < tries; i++) {
+		thr_init_sts.value = ioread32be(MTM_THR_INIT_STS(cpu));
+		if (thr_init_sts.thr_id == thr_init.thr_id) {
+			if (thr_init_sts.bsy)
+				continue;
+			else if (thr_init_sts.err)
+				pr_warn("Failed to thread init cpu %u\n", cpu);
+			break;
+		}
+
+		pr_warn("Wrong thread id in thread init for cpu %u\n", cpu);
+		break;
+	}
+
+	if (i == tries)
+		pr_warn("Got thread init timeout for cpu %u\n", cpu);
+}
+
+int mtm_enable_thread(int cpu)
+{
+	struct nps_host_reg_mtm_cfg mtm_cfg;
+
+	if (NPS_CPU_TO_THREAD_NUM(cpu) == 0)
+		return 1;
+
+	/* Enable thread in mtm */
+	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
+	mtm_cfg.ten |= (1 << (NPS_CPU_TO_THREAD_NUM(cpu)));
+	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
+
+	return 0;
+}
+
+void mtm_enable_core(unsigned int cpu)
+{
+	int i;
+	struct nps_host_reg_aux_mt_ctrl mt_ctrl;
+	struct nps_host_reg_mtm_cfg mtm_cfg;
+
+	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
+		return;
+
+	/* Initialize Number of Active Threads */
+	mtm_init_nat(cpu);
+
+	/* Initialize mtm_cfg */
+	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
+	mtm_cfg.ten = 1;
+	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
+
+	/* Initialize all other threads in core */
+	for (i = 1; i < NPS_NUM_HW_THREADS; i++)
+		mtm_init_thread(cpu + i);
+
+
+	/* Enable HW schedule, stall counter, mtm */
+	mt_ctrl.value = 0;
+	mt_ctrl.hsen = 1;
+	mt_ctrl.hs_cnt = MT_CTRL_HS_CNT;
+	mt_ctrl.sten = 1;
+	mt_ctrl.st_cnt = MT_CTRL_ST_CNT;
+	mt_ctrl.mten = 1;
+	write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
+
+	/*
+	 * HW scheduling mechanism will start working
+	 * Only after call to instruction "schd.rw".
+	 * cpu_relax() calls "schd.rw" instruction.
+	 */
+	cpu_relax();
+}
diff --git a/arch/arc/plat-eznps/platform.c b/arch/arc/plat-eznps/platform.c
new file mode 100644
index 0000000..7ad6d2b
--- /dev/null
+++ b/arch/arc/plat-eznps/platform.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/mach_desc.h>
+#include <plat/mtm.h>
+
+static void __init eznps_configure_msu(void)
+{
+	int cpu;
+	struct nps_host_reg_msu_en_cfg msu_en_cfg = {.value = 0};
+
+	msu_en_cfg.msu_en = 1;
+	msu_en_cfg.ipi_en = 1;
+	msu_en_cfg.gim_0_en = 1;
+	msu_en_cfg.gim_1_en = 1;
+
+	/* enable IPI and GIM messages on all clusters */
+	for (cpu = 0 ; cpu < eznps_max_cpus; cpu += eznps_cpus_per_cluster)
+		iowrite32be(msu_en_cfg.value,
+			    nps_host_reg(cpu, NPS_MSU_BLKID, NPS_MSU_EN_CFG));
+}
+
+static void __init eznps_configure_gim(void)
+{
+	u32 reg_value;
+	u32 gim_int_lines;
+	struct nps_host_reg_gim_p_int_dst gim_p_int_dst = {.value = 0};
+
+	gim_int_lines = NPS_GIM_UART_LINE;
+	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
+	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE;
+	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
+	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE;
+
+	/*
+	 * IRQ polarity
+	 * low or high level
+	 * negative or positive edge
+	 */
+	reg_value = ioread32be(REG_GIM_P_INT_POL_0);
+	reg_value &= ~gim_int_lines;
+	iowrite32be(reg_value, REG_GIM_P_INT_POL_0);
+
+	/* IRQ type level or edge */
+	reg_value = ioread32be(REG_GIM_P_INT_SENS_0);
+	reg_value |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
+	reg_value |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
+	iowrite32be(reg_value, REG_GIM_P_INT_SENS_0);
+
+	/*
+	 * GIM interrupt select type for
+	 * dbg_lan TX and RX interrupts
+	 * should be type 1
+	 * type 0 = IRQ line 6
+	 * type 1 = IRQ line 7
+	 */
+	gim_p_int_dst.is = 1;
+	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_10);
+	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_11);
+	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_25);
+	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_26);
+
+	/*
+	 * CTOP IRQ lines should be defined
+	 * as blocking in GIM
+	*/
+	iowrite32be(gim_int_lines, REG_GIM_P_INT_BLK_0);
+
+	/* enable CTOP IRQ lines in GIM */
+	iowrite32be(gim_int_lines, REG_GIM_P_INT_EN_0);
+}
+
+static void __init eznps_early_init(void)
+{
+	eznps_configure_msu();
+	eznps_configure_gim();
+}
+
+static const char *eznps_compat[] __initconst = {
+	"ezchip,arc-nps",
+	NULL,
+};
+
+MACHINE_START(NPS, "nps")
+	.dt_compat	= eznps_compat,
+	.init_early	= eznps_early_init,
+MACHINE_END
diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c
new file mode 100644
index 0000000..5e901f8
--- /dev/null
+++ b/arch/arc/plat-eznps/smp.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include <linux/smp.h>
+#include <linux/of_fdt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+#include <asm/irq.h>
+#include <plat/ctop.h>
+#include <plat/smp.h>
+#include <plat/mtm.h>
+
+#define NPS_DEFAULT_MSID	0x34
+#define NPS_MTM_CPU_CFG		0x90
+
+static char smp_cpuinfo_buf[128] = {"Extn [EZNPS-SMP]\t: On\n"};
+
+/* Get cpu map from device tree */
+static int __init eznps_get_map(const char *name, struct cpumask *cpumask)
+{
+	unsigned long dt_root = of_get_flat_dt_root();
+	const char *buf;
+
+	buf = of_get_flat_dt_prop(dt_root, name, NULL);
+	if (!buf)
+		return 1;
+
+	cpulist_parse(buf, cpumask);
+
+	return 0;
+}
+
+/* Update board cpu maps */
+static void __init eznps_init_cpumasks(void)
+{
+	struct cpumask cpumask;
+
+	if (eznps_get_map("present-cpus", &cpumask)) {
+		pr_err("Failed to get present-cpus from dtb");
+		return;
+	}
+	init_cpu_present(&cpumask);
+
+	if (eznps_get_map("possible-cpus", &cpumask)) {
+		pr_err("Failed to get possible-cpus from dtb");
+		return;
+	}
+	init_cpu_possible(&cpumask);
+}
+
+static void eznps_init_core(unsigned int cpu)
+{
+	u32 sync_value;
+	struct nps_host_reg_aux_hw_comply hw_comply;
+	struct nps_host_reg_aux_lpc lpc;
+
+	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
+		return;
+
+	hw_comply.value = read_aux_reg(CTOP_AUX_HW_COMPLY);
+	hw_comply.me  = 1;
+	hw_comply.le  = 1;
+	hw_comply.te  = 1;
+	write_aux_reg(CTOP_AUX_HW_COMPLY, hw_comply.value);
+
+	/* Enable MMU clock */
+	lpc.mep = 1;
+	write_aux_reg(CTOP_AUX_LPC, lpc.value);
+
+	/* Boot CPU only */
+	if (!cpu) {
+		/* Write to general purpose register in CRG */
+		sync_value = ioread32be(REG_GEN_PURP_0);
+		sync_value |= NPS_CRG_SYNC_BIT;
+		iowrite32be(sync_value, REG_GEN_PURP_0);
+	}
+}
+
+/*
+ * Master kick starting another CPU
+ */
+static void __init eznps_smp_wakeup_cpu(int cpu, unsigned long pc)
+{
+	struct nps_host_reg_mtm_cpu_cfg cpu_cfg;
+
+	if (mtm_enable_thread(cpu) == 0)
+		return;
+
+	/* set PC, dmsid, and start CPU */
+	cpu_cfg.value = (u32)res_service;
+	cpu_cfg.dmsid = NPS_DEFAULT_MSID;
+	cpu_cfg.cs = 1;
+	iowrite32be(cpu_cfg.value, nps_mtm_reg_addr(cpu, NPS_MTM_CPU_CFG));
+}
+
+static void eznps_ipi_send(int cpu)
+{
+	struct global_id gid;
+	struct {
+		union {
+			struct {
+				u32 num:8, cluster:8, core:8, thread:8;
+			};
+			u32 value;
+		};
+	} ipi;
+
+	gid.value = cpu;
+	ipi.thread = get_thread(gid);
+	ipi.core = gid.core;
+	ipi.cluster = nps_cluster_logic_to_phys(gid.cluster);
+	ipi.num = NPS_IPI_IRQ;
+
+	__asm__ __volatile__(
+	"	mov r3, %0\n"
+	"	.word %1\n"
+	:
+	: "r"(ipi.value), "i"(CTOP_INST_ASRI_0_R3)
+	: "r3");
+}
+
+static void eznps_init_per_cpu(int cpu)
+{
+	smp_ipi_irq_setup(cpu, NPS_IPI_IRQ);
+
+	eznps_init_core(cpu);
+	mtm_enable_core(cpu);
+}
+
+static void eznps_ipi_clear(int irq)
+{
+	write_aux_reg(CTOP_AUX_IACK, 1 << irq);
+}
+
+struct plat_smp_ops plat_smp_ops = {
+	.info		= smp_cpuinfo_buf,
+	.init_early_smp	= eznps_init_cpumasks,
+	.cpu_kick	= eznps_smp_wakeup_cpu,
+	.ipi_send	= eznps_ipi_send,
+	.init_per_cpu	= eznps_init_per_cpu,
+	.ipi_clear	= eznps_ipi_clear,
+};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 31cc2f2..10b27b9 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -48,9 +48,29 @@
 				     <1 24>,
 				     <1 25>,
 				     <1 26>,
+				     /* dma channel 11-14 share one irq */
 				     <1 27>,
+				     <1 27>,
+				     <1 27>,
+				     <1 27>,
+				     /* unused shared irq for all channels */
 				     <1 28>;
-
+			interrupt-names = "dma0",
+					  "dma1",
+					  "dma2",
+					  "dma3",
+					  "dma4",
+					  "dma5",
+					  "dma6",
+					  "dma7",
+					  "dma8",
+					  "dma9",
+					  "dma10",
+					  "dma11",
+					  "dma12",
+					  "dma13",
+					  "dma14",
+					  "dma-shared-all";
 			#dma-cells = <1>;
 			brcm,dma-channel-mask = <0x7f35>;
 		};
diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora.dts b/arch/arm/boot/dts/imx6q-apalis-ixora.dts
index 8e3e146..8e67ca2 100644
--- a/arch/arm/boot/dts/imx6q-apalis-ixora.dts
+++ b/arch/arm/boot/dts/imx6q-apalis-ixora.dts
@@ -219,8 +219,9 @@
 };
 
 &pcie {
-	/* active-low meaning opposite of regular PERST# active-low polarity */
-	reset-gpio = <&gpio1 28 GPIO_ACTIVE_LOW>;
+	/* active-high meaning opposite of regular PERST# active-low polarity */
+	reset-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+	reset-gpio-active-high;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi
index 1ada714..886dbf2 100644
--- a/arch/arm/boot/dts/imx6qp.dtsi
+++ b/arch/arm/boot/dts/imx6qp.dtsi
@@ -82,5 +82,8 @@
 				      "ldb_di0", "ldb_di1", "prg";
 		};
 
+		pcie: pcie@0x01000000 {
+			compatible = "fsl,imx6qp-pcie", "snps,dw-pcie";
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 14594ce..449acf0 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -117,7 +117,7 @@
 			chan_priority = <1>;
 			block_size = <0xfff>;
 			dma-masters = <2>;
-			data_width = <3 3>;
+			data-width = <8 8>;
 		};
 
 		dma@eb000000 {
@@ -133,7 +133,7 @@
 			chan_allocation_order = <1>;
 			chan_priority = <1>;
 			block_size = <0xfff>;
-			data_width = <3 3>;
+			data-width = <8 8>;
 		};
 
 		fsmc: flash@b0000000 {
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 6ad1ced..02283eb 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -118,7 +118,7 @@
 
 #define arch_setup_dma_ops arch_setup_dma_ops
 extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			       struct iommu_ops *iommu, bool coherent);
+			       const struct iommu_ops *iommu, bool coherent);
 
 #define arch_teardown_dma_ops arch_teardown_dma_ops
 extern void arch_teardown_dma_ops(struct device *dev);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 738d5ee..0df6b1f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -187,6 +187,7 @@
 struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 	u32 hvc_exit_stat;
 	u64 wfe_exit_stat;
@@ -290,6 +291,7 @@
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 static inline void kvm_arm_init_debug(void) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index f17a8d4..f9a6506 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -47,6 +47,7 @@
 #include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
+#include <asm/stage2_pgtable.h>
 
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
@@ -106,14 +107,16 @@
 	clean_pte_table(pte);
 }
 
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
-	pte_val(*pte) |= L_PTE_S2_RDWR;
+	pte_val(pte) |= L_PTE_S2_RDWR;
+	return pte;
 }
 
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 {
-	pmd_val(*pmd) |= L_PMD_S2_RDWR;
+	pmd_val(pmd) |= L_PMD_S2_RDWR;
+	return pmd;
 }
 
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
@@ -136,22 +139,6 @@
 	return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
 }
 
-
-/* Open coded p*d_addr_end that can deal with 64bit addresses */
-#define kvm_pgd_addr_end(addr, end)					\
-({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
-	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
-})
-
-#define kvm_pud_addr_end(addr,end)		(end)
-
-#define kvm_pmd_addr_end(addr, end)					\
-({	u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK;		\
-	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
-})
-
-#define kvm_pgd_index(addr)			pgd_index(addr)
-
 static inline bool kvm_page_empty(void *ptr)
 {
 	struct page *ptr_page = virt_to_page(ptr);
@@ -160,19 +147,11 @@
 
 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define kvm_pud_table_empty(kvm, pudp) (0)
+#define kvm_pud_table_empty(kvm, pudp) false
 
-#define KVM_PREALLOC_LEVEL	0
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
-	return kvm->arch.pgd;
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
-	return PTRS_PER_S2_PGD * sizeof(pgd_t);
-}
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define hyp_pud_table_empty(pudp) false
 
 struct kvm;
 
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
new file mode 100644
index 0000000..460d616
--- /dev/null
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_S2_PGTABLE_H_
+#define __ARM_S2_PGTABLE_H_
+
+#define stage2_pgd_none(pgd)			pgd_none(pgd)
+#define stage2_pgd_clear(pgd)			pgd_clear(pgd)
+#define stage2_pgd_present(pgd)			pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud)		pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address)		pud_offset(pgd, address)
+#define stage2_pud_free(pud)			pud_free(NULL, pud)
+
+#define stage2_pud_none(pud)			pud_none(pud)
+#define stage2_pud_clear(pud)			pud_clear(pud)
+#define stage2_pud_present(pud)			pud_present(pud)
+#define stage2_pud_populate(pud, pmd)		pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address)		pmd_offset(pud, address)
+#define stage2_pmd_free(pmd)			pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud)			pud_huge(pud)
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
+
+	return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pud_addr_end(addr, end)		(end)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
+
+	return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pgd_index(addr)				pgd_index(addr)
+
+#define stage2_pte_table_empty(ptep)			kvm_page_empty(ptep)
+#define stage2_pmd_table_empty(pmdp)			kvm_page_empty(pmdp)
+#define stage2_pud_table_empty(pudp)			false
+
+#endif	/* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9ef013d..237d5d8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -444,7 +444,7 @@
 	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 	/* update vttbr to be used with the new vmid */
-	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
+	pgd_phys = virt_to_phys(kvm->arch.pgd);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
 	kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index be30212..45c43ae 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,11 +43,9 @@
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define S2_PGD_SIZE	(PTRS_PER_S2_PGD * sizeof(pgd_t))
 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
 
-#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
-#define kvm_pud_huge(_x)	pud_huge(_x)
-
 #define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
 #define KVM_S2_FLAG_LOGGING_ACTIVE	(1UL << 1)
 
@@ -69,14 +67,7 @@
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
-	/*
-	 * This function also gets called when dealing with HYP page
-	 * tables. As HYP doesn't have an associated struct kvm (and
-	 * the HYP page tables are fairly static), we don't do
-	 * anything there.
-	 */
-	if (kvm)
-		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
 /*
@@ -115,7 +106,7 @@
  */
 static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
 {
-	if (!kvm_pmd_huge(*pmd))
+	if (!pmd_thp_or_huge(*pmd))
 		return;
 
 	pmd_clear(pmd);
@@ -155,29 +146,29 @@
 	return p;
 }
 
-static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
-	pgd_clear(pgd);
+	pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
+	stage2_pgd_clear(pgd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pud_free(NULL, pud_table);
+	stage2_pud_free(pud_table);
 	put_page(virt_to_page(pgd));
 }
 
-static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	pmd_t *pmd_table = pmd_offset(pud, 0);
-	VM_BUG_ON(pud_huge(*pud));
-	pud_clear(pud);
+	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
+	VM_BUG_ON(stage2_pud_huge(*pud));
+	stage2_pud_clear(pud);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pmd_free(NULL, pmd_table);
+	stage2_pmd_free(pmd_table);
 	put_page(virt_to_page(pud));
 }
 
-static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	VM_BUG_ON(kvm_pmd_huge(*pmd));
+	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pte_free_kernel(NULL, pte_table);
@@ -204,7 +195,7 @@
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
  */
-static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t start_addr = addr;
@@ -226,21 +217,21 @@
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
-	if (kvm_pte_table_empty(kvm, start_pte))
-		clear_pmd_entry(kvm, pmd, start_addr);
+	if (stage2_pte_table_empty(start_pte))
+		clear_stage2_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
 	pmd_t *pmd, *start_pmd;
 
-	start_pmd = pmd = pmd_offset(pud, addr);
+	start_pmd = pmd = stage2_pmd_offset(pud, addr);
 	do {
-		next = kvm_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd)) {
+			if (pmd_thp_or_huge(*pmd)) {
 				pmd_t old_pmd = *pmd;
 
 				pmd_clear(pmd);
@@ -250,57 +241,64 @@
 
 				put_page(virt_to_page(pmd));
 			} else {
-				unmap_ptes(kvm, pmd, addr, next);
+				unmap_stage2_ptes(kvm, pmd, addr, next);
 			}
 		}
 	} while (pmd++, addr = next, addr != end);
 
-	if (kvm_pmd_table_empty(kvm, start_pmd))
-		clear_pud_entry(kvm, pud, start_addr);
+	if (stage2_pmd_table_empty(start_pmd))
+		clear_stage2_pud_entry(kvm, pud, start_addr);
 }
 
-static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
 		       phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t next, start_addr = addr;
 	pud_t *pud, *start_pud;
 
-	start_pud = pud = pud_offset(pgd, addr);
+	start_pud = pud = stage2_pud_offset(pgd, addr);
 	do {
-		next = kvm_pud_addr_end(addr, end);
-		if (!pud_none(*pud)) {
-			if (pud_huge(*pud)) {
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
+			if (stage2_pud_huge(*pud)) {
 				pud_t old_pud = *pud;
 
-				pud_clear(pud);
+				stage2_pud_clear(pud);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
-
 				kvm_flush_dcache_pud(old_pud);
-
 				put_page(virt_to_page(pud));
 			} else {
-				unmap_pmds(kvm, pud, addr, next);
+				unmap_stage2_pmds(kvm, pud, addr, next);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
 
-	if (kvm_pud_table_empty(kvm, start_pud))
-		clear_pgd_entry(kvm, pgd, start_addr);
+	if (stage2_pud_table_empty(start_pud))
+		clear_stage2_pgd_entry(kvm, pgd, start_addr);
 }
 
-
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			phys_addr_t start, u64 size)
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm:   The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size:  The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
 
-	pgd = pgdp + kvm_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
-		next = kvm_pgd_addr_end(addr, end);
-		if (!pgd_none(*pgd))
-			unmap_puds(kvm, pgd, addr, next);
+		next = stage2_pgd_addr_end(addr, end);
+		if (!stage2_pgd_none(*pgd))
+			unmap_stage2_puds(kvm, pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -322,11 +320,11 @@
 	pmd_t *pmd;
 	phys_addr_t next;
 
-	pmd = pmd_offset(pud, addr);
+	pmd = stage2_pmd_offset(pud, addr);
 	do {
-		next = kvm_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd))
+			if (pmd_thp_or_huge(*pmd))
 				kvm_flush_dcache_pmd(*pmd);
 			else
 				stage2_flush_ptes(kvm, pmd, addr, next);
@@ -340,11 +338,11 @@
 	pud_t *pud;
 	phys_addr_t next;
 
-	pud = pud_offset(pgd, addr);
+	pud = stage2_pud_offset(pgd, addr);
 	do {
-		next = kvm_pud_addr_end(addr, end);
-		if (!pud_none(*pud)) {
-			if (pud_huge(*pud))
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
+			if (stage2_pud_huge(*pud))
 				kvm_flush_dcache_pud(*pud);
 			else
 				stage2_flush_pmds(kvm, pud, addr, next);
@@ -360,9 +358,9 @@
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
-		next = kvm_pgd_addr_end(addr, end);
+		next = stage2_pgd_addr_end(addr, end);
 		stage2_flush_puds(kvm, pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
@@ -391,6 +389,100 @@
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
+static void clear_hyp_pgd_entry(pgd_t *pgd)
+{
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
+	pgd_clear(pgd);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
+}
+
+static void clear_hyp_pud_entry(pud_t *pud)
+{
+	pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
+	pud_clear(pud);
+	pmd_free(NULL, pmd_table);
+	put_page(virt_to_page(pud));
+}
+
+static void clear_hyp_pmd_entry(pmd_t *pmd)
+{
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(pmd_thp_or_huge(*pmd));
+	pmd_clear(pmd);
+	pte_free_kernel(NULL, pte_table);
+	put_page(virt_to_page(pmd));
+}
+
+static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (hyp_pte_table_empty(start_pte))
+		clear_hyp_pmd_entry(pmd);
+}
+
+static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next;
+	pmd_t *pmd, *start_pmd;
+
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		/* Hyp doesn't use huge pmds */
+		if (!pmd_none(*pmd))
+			unmap_hyp_ptes(pmd, addr, next);
+	} while (pmd++, addr = next, addr != end);
+
+	if (hyp_pmd_table_empty(start_pmd))
+		clear_hyp_pud_entry(pud);
+}
+
+static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		/* Hyp doesn't use huge puds */
+		if (!pud_none(*pud))
+			unmap_hyp_pmds(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+
+	if (hyp_pud_table_empty(start_pud))
+		clear_hyp_pgd_entry(pgd);
+}
+
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	/*
+	 * We don't unmap anything from HYP, except at the hyp tear down.
+	 * Hence, we don't have to invalidate the TLBs here.
+	 */
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (!pgd_none(*pgd))
+			unmap_hyp_puds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
 /**
  * free_boot_hyp_pgd - free HYP boot page tables
  *
@@ -401,14 +493,14 @@
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
 	if (boot_hyp_pgd) {
-		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
-		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+		unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
@@ -433,9 +525,9 @@
 
 	if (hyp_pgd) {
 		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
@@ -645,20 +737,6 @@
 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
-/* Free the HW pgd, one page at a time */
-static void kvm_free_hwpgd(void *hwpgd)
-{
-	free_pages_exact(hwpgd, kvm_get_hwpgd_size());
-}
-
-/* Allocate the HW PGD, making sure that each page gets its own refcount */
-static void *kvm_alloc_hwpgd(void)
-{
-	unsigned int size = kvm_get_hwpgd_size();
-
-	return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
-}
-
 /**
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:	The KVM struct pointer for the VM.
@@ -673,81 +751,22 @@
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
 	pgd_t *pgd;
-	void *hwpgd;
 
 	if (kvm->arch.pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
 
-	hwpgd = kvm_alloc_hwpgd();
-	if (!hwpgd)
+	/* Allocate the HW PGD, making sure that each page gets its own refcount */
+	pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
+	if (!pgd)
 		return -ENOMEM;
 
-	/* When the kernel uses more levels of page tables than the
-	 * guest, we allocate a fake PGD and pre-populate it to point
-	 * to the next-level page table, which will be the real
-	 * initial page table pointed to by the VTTBR.
-	 *
-	 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
-	 * the PMD and the kernel will use folded pud.
-	 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
-	 * pages.
-	 */
-	if (KVM_PREALLOC_LEVEL > 0) {
-		int i;
-
-		/*
-		 * Allocate fake pgd for the page table manipulation macros to
-		 * work.  This is not used by the hardware and we have no
-		 * alignment requirement for this allocation.
-		 */
-		pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
-				GFP_KERNEL | __GFP_ZERO);
-
-		if (!pgd) {
-			kvm_free_hwpgd(hwpgd);
-			return -ENOMEM;
-		}
-
-		/* Plug the HW PGD into the fake one. */
-		for (i = 0; i < PTRS_PER_S2_PGD; i++) {
-			if (KVM_PREALLOC_LEVEL == 1)
-				pgd_populate(NULL, pgd + i,
-					     (pud_t *)hwpgd + i * PTRS_PER_PUD);
-			else if (KVM_PREALLOC_LEVEL == 2)
-				pud_populate(NULL, pud_offset(pgd, 0) + i,
-					     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
-		}
-	} else {
-		/*
-		 * Allocate actual first-level Stage-2 page table used by the
-		 * hardware for Stage-2 page table walks.
-		 */
-		pgd = (pgd_t *)hwpgd;
-	}
-
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
 	return 0;
 }
 
-/**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
- * @kvm:   The VM pointer
- * @start: The intermediate physical base address of the range to unmap
- * @size:  The size of the area to unmap
- *
- * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
- * be called while holding mmu_lock (unless for freeing the stage2 pgd before
- * destroying the VM), otherwise another faulting VCPU may come in and mess
- * with things behind our backs.
- */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
-{
-	unmap_range(kvm, kvm->arch.pgd, start, size);
-}
-
 static void stage2_unmap_memslot(struct kvm *kvm,
 				 struct kvm_memory_slot *memslot)
 {
@@ -830,10 +849,8 @@
 		return;
 
 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-	kvm_free_hwpgd(kvm_get_hwpgd(kvm));
-	if (KVM_PREALLOC_LEVEL > 0)
-		kfree(kvm->arch.pgd);
-
+	/* Free the HW pgd, one page at a time */
+	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
 	kvm->arch.pgd = NULL;
 }
 
@@ -843,16 +860,16 @@
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
-	if (WARN_ON(pgd_none(*pgd))) {
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+	if (WARN_ON(stage2_pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
 		pud = mmu_memory_cache_alloc(cache);
-		pgd_populate(NULL, pgd, pud);
+		stage2_pgd_populate(pgd, pud);
 		get_page(virt_to_page(pgd));
 	}
 
-	return pud_offset(pgd, addr);
+	return stage2_pud_offset(pgd, addr);
 }
 
 static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -862,15 +879,15 @@
 	pmd_t *pmd;
 
 	pud = stage2_get_pud(kvm, cache, addr);
-	if (pud_none(*pud)) {
+	if (stage2_pud_none(*pud)) {
 		if (!cache)
 			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
-		pud_populate(NULL, pud, pmd);
+		stage2_pud_populate(pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	return pmd_offset(pud, addr);
+	return stage2_pmd_offset(pud, addr);
 }
 
 static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -893,11 +910,14 @@
 	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
 
 	old_pmd = *pmd;
-	kvm_set_pmd(pmd, *new_pmd);
-	if (pmd_present(old_pmd))
+	if (pmd_present(old_pmd)) {
+		pmd_clear(pmd);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	else
+	} else {
 		get_page(virt_to_page(pmd));
+	}
+
+	kvm_set_pmd(pmd, *new_pmd);
 	return 0;
 }
 
@@ -946,15 +966,38 @@
 
 	/* Create 2nd stage page table mapping - Level 3 */
 	old_pte = *pte;
-	kvm_set_pte(pte, *new_pte);
-	if (pte_present(old_pte))
+	if (pte_present(old_pte)) {
+		kvm_set_pte(pte, __pte(0));
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	else
+	} else {
 		get_page(virt_to_page(pte));
+	}
 
+	kvm_set_pte(pte, *new_pte);
 	return 0;
 }
 
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+	if (pte_young(*pte)) {
+		*pte = pte_mkold(*pte);
+		return 1;
+	}
+	return 0;
+}
+#else
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+	return __ptep_test_and_clear_young(pte);
+}
+#endif
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+	return stage2_ptep_test_and_clear_young((pte_t *)pmd);
+}
+
 /**
  * kvm_phys_addr_ioremap - map a device range to guest IPA
  *
@@ -978,7 +1021,7 @@
 		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
 
 		if (writable)
-			kvm_set_s2pte_writable(&pte);
+			pte = kvm_s2pte_mkwrite(pte);
 
 		ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
 						KVM_NR_MEM_OBJS);
@@ -1078,12 +1121,12 @@
 	pmd_t *pmd;
 	phys_addr_t next;
 
-	pmd = pmd_offset(pud, addr);
+	pmd = stage2_pmd_offset(pud, addr);
 
 	do {
-		next = kvm_pmd_addr_end(addr, end);
+		next = stage2_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd)) {
+			if (pmd_thp_or_huge(*pmd)) {
 				if (!kvm_s2pmd_readonly(pmd))
 					kvm_set_s2pmd_readonly(pmd);
 			} else {
@@ -1106,12 +1149,12 @@
 	pud_t *pud;
 	phys_addr_t next;
 
-	pud = pud_offset(pgd, addr);
+	pud = stage2_pud_offset(pgd, addr);
 	do {
-		next = kvm_pud_addr_end(addr, end);
-		if (!pud_none(*pud)) {
+		next = stage2_pud_addr_end(addr, end);
+		if (!stage2_pud_none(*pud)) {
 			/* TODO:PUD not supported, revisit later if supported */
-			BUG_ON(kvm_pud_huge(*pud));
+			BUG_ON(stage2_pud_huge(*pud));
 			stage2_wp_pmds(pud, addr, next);
 		}
 	} while (pud++, addr = next, addr != end);
@@ -1128,7 +1171,7 @@
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1140,8 +1183,8 @@
 		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
 			cond_resched_lock(&kvm->mmu_lock);
 
-		next = kvm_pgd_addr_end(addr, end);
-		if (pgd_present(*pgd))
+		next = stage2_pgd_addr_end(addr, end);
+		if (stage2_pgd_present(*pgd))
 			stage2_wp_puds(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
@@ -1320,7 +1363,7 @@
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
 		if (writable) {
-			kvm_set_s2pmd_writable(&new_pmd);
+			new_pmd = kvm_s2pmd_mkwrite(new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
@@ -1329,7 +1372,7 @@
 		pte_t new_pte = pfn_pte(pfn, mem_type);
 
 		if (writable) {
-			kvm_set_s2pte_writable(&new_pte);
+			new_pte = kvm_s2pte_mkwrite(new_pte);
 			kvm_set_pfn_dirty(pfn);
 			mark_page_dirty(kvm, gfn);
 		}
@@ -1348,6 +1391,8 @@
  * Resolve the access fault by making the page young again.
  * Note that because the faulting entry is guaranteed not to be
  * cached in the TLB, we don't need to invalidate anything.
+ * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
+ * so there is no need for atomic (pte|pmd)_mkyoung operations.
  */
 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 {
@@ -1364,7 +1409,7 @@
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		goto out;
 
-	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
+	if (pmd_thp_or_huge(*pmd)) {	/* THP, HugeTLB */
 		*pmd = pmd_mkyoung(*pmd);
 		pfn = pmd_pfn(*pmd);
 		pfn_valid = true;
@@ -1588,25 +1633,14 @@
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
-	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
-		if (pmd_young(*pmd)) {
-			*pmd = pmd_mkold(*pmd);
-			return 1;
-		}
-
-		return 0;
-	}
+	if (pmd_thp_or_huge(*pmd))	/* THP, HugeTLB */
+		return stage2_pmdp_test_and_clear_young(pmd);
 
 	pte = pte_offset_kernel(pmd, gpa);
 	if (pte_none(*pte))
 		return 0;
 
-	if (pte_young(*pte)) {
-		*pte = pte_mkold(*pte);	/* Just a page... */
-		return 1;
-	}
-
-	return 0;
+	return stage2_ptep_test_and_clear_young(pte);
 }
 
 static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
@@ -1618,7 +1652,7 @@
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 
-	if (kvm_pmd_huge(*pmd))		/* THP, HugeTLB */
+	if (pmd_thp_or_huge(*pmd))		/* THP, HugeTLB */
 		return pmd_young(*pmd);
 
 	pte = pte_offset_kernel(pmd, gpa);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c941e93..5c2ca06 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2215,7 +2215,7 @@
 }
 
 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
-				    struct iommu_ops *iommu)
+				    const struct iommu_ops *iommu)
 {
 	struct dma_iommu_mapping *mapping;
 
@@ -2253,7 +2253,7 @@
 #else
 
 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
-				    struct iommu_ops *iommu)
+				    const struct iommu_ops *iommu)
 {
 	return false;
 }
@@ -2270,7 +2270,7 @@
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			struct iommu_ops *iommu, bool coherent)
+			const struct iommu_ops *iommu, bool coherent)
 {
 	struct dma_map_ops *dma_ops;
 
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index ba437f0..7dbea6c 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -48,7 +48,7 @@
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			struct iommu_ops *iommu, bool coherent);
+			const struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops	arch_setup_dma_ops
 
 #ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1b3dc9df..2cdb6b5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -85,32 +85,37 @@
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1	((1 << 31) | (1 << 23))
-#define TCR_EL2_TBI	(1 << 20)
-#define TCR_EL2_PS	(7 << 16)
-#define TCR_EL2_PS_40B	(2 << 16)
-#define TCR_EL2_TG0	(1 << 14)
-#define TCR_EL2_SH0	(3 << 12)
-#define TCR_EL2_ORGN0	(3 << 10)
-#define TCR_EL2_IRGN0	(3 << 8)
-#define TCR_EL2_T0SZ	0x3f
-#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
-			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+#define TCR_EL2_RES1		((1 << 31) | (1 << 23))
+#define TCR_EL2_TBI		(1 << 20)
+#define TCR_EL2_PS_SHIFT	16
+#define TCR_EL2_PS_MASK		(7 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_PS_40B		(2 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_TG0_MASK	TCR_TG0_MASK
+#define TCR_EL2_SH0_MASK	TCR_SH0_MASK
+#define TCR_EL2_ORGN0_MASK	TCR_ORGN0_MASK
+#define TCR_EL2_IRGN0_MASK	TCR_IRGN0_MASK
+#define TCR_EL2_T0SZ_MASK	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
+			 TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
 
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_RES1		(1 << 31)
-#define VTCR_EL2_PS_MASK	(7 << 16)
-#define VTCR_EL2_TG0_MASK	(1 << 14)
-#define VTCR_EL2_TG0_4K		(0 << 14)
-#define VTCR_EL2_TG0_64K	(1 << 14)
-#define VTCR_EL2_SH0_MASK	(3 << 12)
-#define VTCR_EL2_SH0_INNER	(3 << 12)
-#define VTCR_EL2_ORGN0_MASK	(3 << 10)
-#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
-#define VTCR_EL2_IRGN0_MASK	(3 << 8)
-#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
-#define VTCR_EL2_SL0_MASK	(3 << 6)
-#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_HD		(1 << 22)
+#define VTCR_EL2_HA		(1 << 21)
+#define VTCR_EL2_PS_MASK	TCR_EL2_PS_MASK
+#define VTCR_EL2_TG0_MASK	TCR_TG0_MASK
+#define VTCR_EL2_TG0_4K		TCR_TG0_4K
+#define VTCR_EL2_TG0_16K	TCR_TG0_16K
+#define VTCR_EL2_TG0_64K	TCR_TG0_64K
+#define VTCR_EL2_SH0_MASK	TCR_SH0_MASK
+#define VTCR_EL2_SH0_INNER	TCR_SH0_INNER
+#define VTCR_EL2_ORGN0_MASK	TCR_ORGN0_MASK
+#define VTCR_EL2_ORGN0_WBWA	TCR_ORGN0_WBWA
+#define VTCR_EL2_IRGN0_MASK	TCR_IRGN0_MASK
+#define VTCR_EL2_IRGN0_WBWA	TCR_IRGN0_WBWA
+#define VTCR_EL2_SL0_SHIFT	6
+#define VTCR_EL2_SL0_MASK	(3 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_LVL1	(1 << VTCR_EL2_SL0_SHIFT)
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
 #define VTCR_EL2_VS_SHIFT	19
@@ -126,35 +131,45 @@
  * (see hyp-init.S).
  *
  * Note that when using 4K pages, we concatenate two first level page tables
- * together.
+ * together. With 16K pages, we concatenate 16 first level page tables.
  *
  * The magic numbers used for VTTBR_X in this patch can be found in Tables
  * D4-23 and D4-25 in ARM DDI 0487A.b.
  */
+
+#define VTCR_EL2_T0SZ_IPA	VTCR_EL2_T0SZ_40B
+#define VTCR_EL2_COMMON_BITS	(VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
+
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
- * 40bits input  (T0SZ = 24)
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
-				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
-#else
+#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC		38
+#elif defined(CONFIG_ARM64_16K_PAGES)
 /*
  * Stage2 translation configuration:
- * 40bits input  (T0SZ = 24)
+ * 16kB pages (TG0 = 2)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC		42
+#else	/* 4K */
+/*
+ * Stage2 translation configuration:
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
-				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_TGRAN_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC		37
 #endif
 
+#define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
+#define VTTBR_X				(VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (UL(48))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 90a8d23..e63d23b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -295,6 +295,7 @@
 struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 	u32 hvc_exit_stat;
 	u64 wfe_exit_stat;
@@ -369,6 +370,7 @@
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e8d39d4..f05ac27 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -45,18 +45,6 @@
  */
 #define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
 
-/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
- * levels in addition to the PGD and potentially the PUD which are
- * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
- * tables use one level of tables less than the kernel.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define KVM_MMU_CACHE_MIN_PAGES	1
-#else
-#define KVM_MMU_CACHE_MIN_PAGES	2
-#endif
-
 #ifdef __ASSEMBLY__
 
 #include <asm/alternative.h>
@@ -91,6 +79,8 @@
 #define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
 
+#include <asm/stage2_pgtable.h>
+
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
@@ -122,19 +112,32 @@
 static inline void kvm_clean_pte(pte_t *pte) {}
 static inline void kvm_clean_pte_entry(pte_t *pte) {}
 
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
-	pte_val(*pte) |= PTE_S2_RDWR;
+	pte_val(pte) |= PTE_S2_RDWR;
+	return pte;
 }
 
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 {
-	pmd_val(*pmd) |= PMD_S2_RDWR;
+	pmd_val(pmd) |= PMD_S2_RDWR;
+	return pmd;
 }
 
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
-	pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
+	pteval_t pteval;
+	unsigned long tmp;
+
+	asm volatile("//	kvm_set_s2pte_readonly\n"
+	"	prfm	pstl1strm, %2\n"
+	"1:	ldxr	%0, %2\n"
+	"	and	%0, %0, %3		// clear PTE_S2_RDWR\n"
+	"	orr	%0, %0, %4		// set PTE_S2_RDONLY\n"
+	"	stxr	%w1, %0, %2\n"
+	"	cbnz	%w1, 1b\n"
+	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
+	: "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
 }
 
 static inline bool kvm_s2pte_readonly(pte_t *pte)
@@ -144,69 +147,12 @@
 
 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
 {
-	pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
+	kvm_set_s2pte_readonly((pte_t *)pmd);
 }
 
 static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 {
-	return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
-}
-
-
-#define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
-#define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
-#define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
-
-/*
- * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
- * the entire IPA input range with a single pgd entry, and we would only need
- * one pgd entry.  Note that in this case, the pgd is actually not used by
- * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
- * structure for the kernel pgtable macros to work.
- */
-#if PGDIR_SHIFT > KVM_PHYS_SHIFT
-#define PTRS_PER_S2_PGD_SHIFT	0
-#else
-#define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
-#endif
-#define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
-
-#define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
-
-/*
- * If we are concatenating first level stage-2 page tables, we would have less
- * than or equal to 16 pointers in the fake PGD, because that's what the
- * architecture allows.  In this case, (4 - CONFIG_PGTABLE_LEVELS)
- * represents the first level for the host, and we add 1 to go to the next
- * level (which uses contatenation) for the stage-2 tables.
- */
-#if PTRS_PER_S2_PGD <= 16
-#define KVM_PREALLOC_LEVEL	(4 - CONFIG_PGTABLE_LEVELS + 1)
-#else
-#define KVM_PREALLOC_LEVEL	(0)
-#endif
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
-	pgd_t *pgd = kvm->arch.pgd;
-	pud_t *pud;
-
-	if (KVM_PREALLOC_LEVEL == 0)
-		return pgd;
-
-	pud = pud_offset(pgd, 0);
-	if (KVM_PREALLOC_LEVEL == 1)
-		return pud;
-
-	BUG_ON(KVM_PREALLOC_LEVEL != 2);
-	return pmd_offset(pud, 0);
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
-	if (KVM_PREALLOC_LEVEL > 0)
-		return PTRS_PER_S2_PGD * PAGE_SIZE;
-	return PTRS_PER_S2_PGD * sizeof(pgd_t);
+	return kvm_s2pte_readonly((pte_t *)pmd);
 }
 
 static inline bool kvm_page_empty(void *ptr)
@@ -215,23 +161,20 @@
 	return page_count(ptr_page) == 1;
 }
 
-#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
 
 #ifdef __PAGETABLE_PMD_FOLDED
-#define kvm_pmd_table_empty(kvm, pmdp) (0)
+#define hyp_pmd_table_empty(pmdp) (0)
 #else
-#define kvm_pmd_table_empty(kvm, pmdp) \
-	(kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
 #endif
 
 #ifdef __PAGETABLE_PUD_FOLDED
-#define kvm_pud_table_empty(kvm, pudp) (0)
+#define hyp_pud_table_empty(pudp) (0)
 #else
-#define kvm_pud_table_empty(kvm, pudp) \
-	(kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
+#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
 #endif
 
-
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9786f77..2813748 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -207,23 +207,69 @@
 #define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
 #define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
 #define TCR_TxSZ_WIDTH		6
-#define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
-#define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
-#define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
-#define TCR_IRGN_WBnWA		((UL(3) << 8) | (UL(3) << 24))
-#define TCR_IRGN_MASK		((UL(3) << 8) | (UL(3) << 24))
-#define TCR_ORGN_NC		((UL(0) << 10) | (UL(0) << 26))
-#define TCR_ORGN_WBWA		((UL(1) << 10) | (UL(1) << 26))
-#define TCR_ORGN_WT		((UL(2) << 10) | (UL(2) << 26))
-#define TCR_ORGN_WBnWA		((UL(3) << 10) | (UL(3) << 26))
-#define TCR_ORGN_MASK		((UL(3) << 10) | (UL(3) << 26))
-#define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
-#define TCR_TG0_4K		(UL(0) << 14)
-#define TCR_TG0_64K		(UL(1) << 14)
-#define TCR_TG0_16K		(UL(2) << 14)
-#define TCR_TG1_16K		(UL(1) << 30)
-#define TCR_TG1_4K		(UL(2) << 30)
-#define TCR_TG1_64K		(UL(3) << 30)
+
+#define TCR_IRGN0_SHIFT		8
+#define TCR_IRGN0_MASK		(UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC		(UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA		(UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT		(UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA		(UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_IRGN1_SHIFT		24
+#define TCR_IRGN1_MASK		(UL(3) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_NC		(UL(0) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBWA		(UL(1) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WT		(UL(2) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBnWA		(UL(3) << TCR_IRGN1_SHIFT)
+
+#define TCR_IRGN_NC		(TCR_IRGN0_NC | TCR_IRGN1_NC)
+#define TCR_IRGN_WBWA		(TCR_IRGN0_WBWA | TCR_IRGN1_WBWA)
+#define TCR_IRGN_WT		(TCR_IRGN0_WT | TCR_IRGN1_WT)
+#define TCR_IRGN_WBnWA		(TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA)
+#define TCR_IRGN_MASK		(TCR_IRGN0_MASK | TCR_IRGN1_MASK)
+
+
+#define TCR_ORGN0_SHIFT		10
+#define TCR_ORGN0_MASK		(UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC		(UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA		(UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT		(UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA		(UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_ORGN1_SHIFT		26
+#define TCR_ORGN1_MASK		(UL(3) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_NC		(UL(0) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBWA		(UL(1) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WT		(UL(2) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBnWA		(UL(3) << TCR_ORGN1_SHIFT)
+
+#define TCR_ORGN_NC		(TCR_ORGN0_NC | TCR_ORGN1_NC)
+#define TCR_ORGN_WBWA		(TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)
+#define TCR_ORGN_WT		(TCR_ORGN0_WT | TCR_ORGN1_WT)
+#define TCR_ORGN_WBnWA		(TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA)
+#define TCR_ORGN_MASK		(TCR_ORGN0_MASK | TCR_ORGN1_MASK)
+
+#define TCR_SH0_SHIFT		12
+#define TCR_SH0_MASK		(UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER		(UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_SH1_SHIFT		28
+#define TCR_SH1_MASK		(UL(3) << TCR_SH1_SHIFT)
+#define TCR_SH1_INNER		(UL(3) << TCR_SH1_SHIFT)
+#define TCR_SHARED		(TCR_SH0_INNER | TCR_SH1_INNER)
+
+#define TCR_TG0_SHIFT		14
+#define TCR_TG0_MASK		(UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K		(UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K		(UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K		(UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_TG1_SHIFT		30
+#define TCR_TG1_MASK		(UL(3) << TCR_TG1_SHIFT)
+#define TCR_TG1_16K		(UL(1) << TCR_TG1_SHIFT)
+#define TCR_TG1_4K		(UL(2) << TCR_TG1_SHIFT)
+#define TCR_TG1_64K		(UL(3) << TCR_TG1_SHIFT)
+
 #define TCR_ASID16		(UL(1) << 36)
 #define TCR_TBI0		(UL(1) << 37)
 #define TCR_HA			(UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index a7ac45a..46472a9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -300,6 +300,8 @@
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
 
+#define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
+
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 
@@ -549,14 +551,12 @@
  * Atomic pte/pmd modifications.
  */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address,
-					    pte_t *ptep)
+static inline int __ptep_test_and_clear_young(pte_t *ptep)
 {
 	pteval_t pteval;
 	unsigned int tmp, res;
 
-	asm volatile("//	ptep_test_and_clear_young\n"
+	asm volatile("//	__ptep_test_and_clear_young\n"
 	"	prfm	pstl1strm, %2\n"
 	"1:	ldxr	%0, %2\n"
 	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
@@ -569,6 +569,13 @@
 	return res;
 }
 
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address,
+					    pte_t *ptep)
+{
+	return __ptep_test_and_clear_young(ptep);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
new file mode 100644
index 0000000..2656a0f
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
+#define __ARM64_S2_PGTABLE_NOPMD_H_
+
+#include <asm/stage2_pgtable-nopud.h>
+
+#define __S2_PGTABLE_PMD_FOLDED
+
+#define S2_PMD_SHIFT		S2_PUD_SHIFT
+#define S2_PTRS_PER_PMD		1
+#define S2_PMD_SIZE		(1UL << S2_PMD_SHIFT)
+#define S2_PMD_MASK		(~(S2_PMD_SIZE-1))
+
+#define stage2_pud_none(pud)			(0)
+#define stage2_pud_present(pud)			(1)
+#define stage2_pud_clear(pud)			do { } while (0)
+#define stage2_pud_populate(pud, pmd)		do { } while (0)
+#define stage2_pmd_offset(pud, address)		((pmd_t *)(pud))
+
+#define stage2_pmd_free(pmd)			do { } while (0)
+
+#define stage2_pmd_addr_end(addr, end)		(end)
+
+#define stage2_pud_huge(pud)			(0)
+#define stage2_pmd_table_empty(pmdp)		(0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
new file mode 100644
index 0000000..5ee87b5
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
+#define __ARM64_S2_PGTABLE_NOPUD_H_
+
+#define __S2_PGTABLE_PUD_FOLDED
+
+#define S2_PUD_SHIFT		S2_PGDIR_SHIFT
+#define S2_PTRS_PER_PUD		1
+#define S2_PUD_SIZE		(_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK		(~(S2_PUD_SIZE-1))
+
+#define stage2_pgd_none(pgd)			(0)
+#define stage2_pgd_present(pgd)			(1)
+#define stage2_pgd_clear(pgd)			do { } while (0)
+#define stage2_pgd_populate(pgd, pud)	do { } while (0)
+
+#define stage2_pud_offset(pgd, address)		((pud_t *)(pgd))
+
+#define stage2_pud_free(x)			do { } while (0)
+
+#define stage2_pud_addr_end(addr, end)		(end)
+#define stage2_pud_table_empty(pmdp)		(0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
new file mode 100644
index 0000000..8b68099
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_H_
+#define __ARM64_S2_PGTABLE_H_
+
+#include <asm/pgtable.h>
+
+/*
+ * The hardware supports concatenation of up to 16 tables at stage2 entry level
+ * and we use the feature whenever possible.
+ *
+ * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
+ * On arm64, the smallest PAGE_SIZE supported is 4k, which means
+ *             (PAGE_SHIFT - 3) > 4 holds for all page sizes.
+ * This implies, the total number of page table levels at stage2 expected
+ * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * in normal translations(e.g, stage1), since we cannot have another level in
+ * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ */
+#define STAGE2_PGTABLE_LEVELS		ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+
+/*
+ * With all the supported VA_BITs and 40bit guest IPA, the following condition
+ * is always true:
+ *
+ *       STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
+ *
+ * We base our stage-2 page table walker helpers on this assumption and
+ * fall back to using the host version of the helper wherever possible.
+ * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
+ * to using the host version, since it is guaranteed it is not folded at host.
+ *
+ * If the condition breaks in the future, we can rearrange the host level
+ * definitions and reuse them for stage2. Till then...
+ */
+#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
+#error "Unsupported combination of guest IPA and host VA_BITS."
+#endif
+
+/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
+#define S2_PGDIR_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
+#define S2_PGDIR_SIZE			(_AC(1, UL) << S2_PGDIR_SHIFT)
+#define S2_PGDIR_MASK			(~(S2_PGDIR_SIZE - 1))
+
+/*
+ * The number of PTRS across all concatenated stage2 tables given by the
+ * number of bits resolved at the initial level.
+ */
+#define PTRS_PER_S2_PGD			(1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
+ * levels in addition to the PGD.
+ */
+#define KVM_MMU_CACHE_MIN_PAGES		(STAGE2_PGTABLE_LEVELS - 1)
+
+
+#if STAGE2_PGTABLE_LEVELS > 3
+
+#define S2_PUD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define S2_PUD_SIZE			(_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK			(~(S2_PUD_SIZE - 1))
+
+#define stage2_pgd_none(pgd)				pgd_none(pgd)
+#define stage2_pgd_clear(pgd)				pgd_clear(pgd)
+#define stage2_pgd_present(pgd)				pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud)			pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address)			pud_offset(pgd, address)
+#define stage2_pud_free(pud)				pud_free(NULL, pud)
+
+#define stage2_pud_table_empty(pudp)			kvm_page_empty(pudp)
+
+static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+
+	return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif		/* STAGE2_PGTABLE_LEVELS > 3 */
+
+
+#if STAGE2_PGTABLE_LEVELS > 2
+
+#define S2_PMD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define S2_PMD_SIZE			(_AC(1, UL) << S2_PMD_SHIFT)
+#define S2_PMD_MASK			(~(S2_PMD_SIZE - 1))
+
+#define stage2_pud_none(pud)				pud_none(pud)
+#define stage2_pud_clear(pud)				pud_clear(pud)
+#define stage2_pud_present(pud)				pud_present(pud)
+#define stage2_pud_populate(pud, pmd)			pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address)			pmd_offset(pud, address)
+#define stage2_pmd_free(pmd)				pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud)				pud_huge(pud)
+#define stage2_pmd_table_empty(pmdp)			kvm_page_empty(pmdp)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+
+	return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif		/* STAGE2_PGTABLE_LEVELS > 2 */
+
+#define stage2_pte_table_empty(ptep)			kvm_page_empty(ptep)
+
+#if STAGE2_PGTABLE_LEVELS == 2
+#include <asm/stage2_pgtable-nopmd.h>
+#elif STAGE2_PGTABLE_LEVELS == 3
+#include <asm/stage2_pgtable-nopud.h>
+#endif
+
+
+#define stage2_pgd_index(addr)				(((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+
+	return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif	/* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index de7450d..aa2e34e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,7 +22,6 @@
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
-	depends on !ARM64_16K_PAGES
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index bcbe761..b81f409 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -66,6 +66,14 @@
 	val |= 64 - (parange > 40 ? 40 : parange);
 
 	/*
+	 * Check the availability of Hardware Access Flag / Dirty Bit
+	 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
+	 */
+	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
+	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+		val |= VTCR_EL2_HA;
+
+	/*
 	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
 	 * bit in VTCR_EL2.
 	 */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index fd8b942..c566ec8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -562,8 +562,8 @@
 		struct page **pages;
 		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
 
-		pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
-					flush_page);
+		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
+					handle, flush_page);
 		if (!pages)
 			return NULL;
 
@@ -947,13 +947,13 @@
 #else
 
 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-				  struct iommu_ops *iommu)
+				  const struct iommu_ops *iommu)
 { }
 
 #endif  /* CONFIG_IOMMU_DMA */
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			struct iommu_ops *iommu, bool coherent)
+			const struct iommu_ops *iommu, bool coherent)
 {
 	if (!dev->archdata.dma_ops)
 		dev->archdata.dma_ops = &swiotlb_dma_ops;
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index bf445aa..00d6dcc 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1365,8 +1365,8 @@
 	slave->dma_dev = &dw_dmac0_device.dev;
 	slave->src_id = 0;
 	slave->dst_id = 1;
-	slave->src_master = 1;
-	slave->dst_master = 0;
+	slave->m_master = 1;
+	slave->p_master = 0;
 
 	data->dma_slave = slave;
 	data->dma_filter = at32_mci_dma_filter;
@@ -2061,16 +2061,16 @@
 	if (flags & AC97C_CAPTURE) {
 		rx_dws->dma_dev = &dw_dmac0_device.dev;
 		rx_dws->src_id = 3;
-		rx_dws->src_master = 0;
-		rx_dws->dst_master = 1;
+		rx_dws->m_master = 0;
+		rx_dws->p_master = 1;
 	}
 
 	/* Check if DMA slave interface for playback should be configured. */
 	if (flags & AC97C_PLAYBACK) {
 		tx_dws->dma_dev = &dw_dmac0_device.dev;
 		tx_dws->dst_id = 4;
-		tx_dws->src_master = 0;
-		tx_dws->dst_master = 1;
+		tx_dws->m_master = 0;
+		tx_dws->p_master = 1;
 	}
 
 	if (platform_device_add_data(pdev, data,
@@ -2141,8 +2141,8 @@
 
 	dws->dma_dev = &dw_dmac0_device.dev;
 	dws->dst_id = 2;
-	dws->src_master = 0;
-	dws->dst_master = 1;
+	dws->m_master = 0;
+	dws->p_master = 1;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct atmel_abdac_pdata)))
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 2889412..07a4e32 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1904,13 +1904,10 @@
 				      unsigned long action,
 				      void *hcpu)
 {
-	int hotcpu = (unsigned long) hcpu;
-
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust,
-					 NULL, 0);
+		ia64_mca_cmc_vector_adjust(NULL);
 		break;
 	}
 	return NOTIFY_OK;
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 6f7d4a4..77edd68 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -548,6 +548,7 @@
 			return;
 		}
 		switch (vector) {
+		      default:
 		      case 29:
 			siginfo.si_code = TRAP_HWBKPT;
 #ifdef CONFIG_ITANIUM
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index e7ae608..7f0d316 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1378,6 +1378,7 @@
 	 * extract the instruction from the bundle given the slot number
 	 */
 	switch (ipsr->ri) {
+	      default:
 	      case 0: u.l = (bundle[0] >>  5); break;
 	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
 	      case 2: u.l = (bundle[1] >> 23); break;
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index 231234c..c31fe63 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -426,7 +426,6 @@
 void
 sn_acpi_slot_fixup(struct pci_dev *dev)
 {
-	void __iomem *addr;
 	struct pcidev_info *pcidev_info = NULL;
 	struct sn_irq_info *sn_irq_info = NULL;
 	struct resource *res;
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index c15a41e..d63809a 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -151,7 +151,7 @@
 {
 	int idx;
 	struct resource *res;
-	unsigned long addr, size;
+	unsigned long size;
 	struct pcidev_info *pcidev_info;
 	struct sn_irq_info *sn_irq_info;
 	int status;
@@ -186,7 +186,7 @@
 			continue;
 
 		res->start = pcidev_info->pdi_pio_mapped_addr[idx];
-		res->end = addr + size;
+		res->end = res->start + size;
 
 		/*
 		 * if it's already in the device structure, remove it before
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index f9c8d9f..c98dc96 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -54,7 +54,7 @@
 			       volatile unsigned long *, unsigned long,
 			       volatile unsigned long *, unsigned long);
 void
-sn2_ptc_deadlock_recovery(short *, short, short, int,
+sn2_ptc_deadlock_recovery(nodemask_t, short, short, int,
 			  volatile unsigned long *, unsigned long,
 			  volatile unsigned long *, unsigned long);
 
@@ -169,7 +169,7 @@
 	int use_cpu_ptcga;
 	volatile unsigned long *ptc0, *ptc1;
 	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
-	short nasids[MAX_NUMNODES], nix;
+	short nix;
 	nodemask_t nodes_flushed;
 	int active, max_active, deadlock, flush_opt = sn2_flush_opt;
 
@@ -218,9 +218,7 @@
 	}
 
 	itc = ia64_get_itc();
-	nix = 0;
-	for_each_node_mask(cnode, nodes_flushed)
-		nasids[nix++] = cnodeid_to_nasid(cnode);
+	nix = nodes_weight(nodes_flushed);
 
 	rr_value = (mm->context << 3) | REGION_NUMBER(start);
 
@@ -270,8 +268,10 @@
 			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
 		deadlock = 0;
 		active = 0;
-		for (ibegin = 0, i = 0; i < nix; i++) {
-			nasid = nasids[i];
+		ibegin = 0;
+		i = 0;
+		for_each_node_mask(cnode, nodes_flushed) {
+			nasid = cnodeid_to_nasid(cnode);
 			if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
 				ia64_ptcga(start, nbits << 2);
 				ia64_srlz_i();
@@ -286,13 +286,14 @@
 				if ((deadlock = wait_piowc())) {
 					if (flush_opt == 1)
 						goto done;
-					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+					sn2_ptc_deadlock_recovery(nodes_flushed, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
 					if (reset_max_active_on_deadlock())
 						max_active = 1;
 				}
 				active = 0;
 				ibegin = i + 1;
 			}
+			i++;
 		}
 		start += (1UL << nbits);
 	} while (start < end);
@@ -327,11 +328,12 @@
  */
 
 void
-sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+sn2_ptc_deadlock_recovery(nodemask_t nodes, short ib, short ie, int mynasid,
 			  volatile unsigned long *ptc0, unsigned long data0,
 			  volatile unsigned long *ptc1, unsigned long data1)
 {
 	short nasid, i;
+	int cnode;
 	unsigned long *piows, zeroval, n;
 
 	__this_cpu_inc(ptcstats.deadlocks);
@@ -339,17 +341,26 @@
 	piows = (unsigned long *) pda->pio_write_status_addr;
 	zeroval = pda->pio_write_status_val;
 
+	i = 0;
+	for_each_node_mask(cnode, nodes) {
+		if (i < ib)
+			goto next;
 
-	for (i=ib; i <= ie; i++) {
-		nasid = nasids[i];
+		if (i > ie)
+			break;
+
+		nasid = cnodeid_to_nasid(cnode);
 		if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
-			continue;
+			goto next;
+
 		ptc0 = CHANGE_NASID(nasid, ptc0);
 		if (ptc1)
 			ptc1 = CHANGE_NASID(nasid, ptc1);
 
 		n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
 		__this_cpu_add(ptcstats.deadlocks2, n);
+next:
+		i++;
 	}
 
 }
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d2ac117..5663f41 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -62,6 +62,7 @@
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select GENERIC_TIME_VSYSCALL
 	select ARCH_CLOCKSOURCE_DATA
+	select HANDLE_DOMAIN_IRQ
 
 menu "Machine selection"
 
@@ -137,7 +138,7 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_MIPS16
-	select SYS_SUPPORTS_ZBOOT
+	select SYS_SUPPORTS_ZBOOT_UART_PROM
 	select USE_OF
 	help
 	  Support for the Atheros AR71XX/AR724X/AR913X SoCs.
@@ -194,6 +195,7 @@
 	select GPIOLIB
 	select LEDS_GPIO_REGISTER
 	select BCM47XX_NVRAM
+	select BCM47XX_SPROM
 	help
 	 Support for BCM47XX based boards
 
@@ -471,6 +473,7 @@
 	select SYS_SUPPORTS_MULTITHREADING
 	select SYS_SUPPORTS_SMARTMIPS
 	select SYS_SUPPORTS_ZBOOT
+	select SYS_SUPPORTS_RELOCATABLE
 	select USE_OF
 	select ZONE_DMA32 if 64BIT
 	select BUILTIN_DTB
@@ -505,6 +508,7 @@
 	select MIPS_MSC
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_HAS_CPU_MIPS32_R6
 	select SYS_HAS_CPU_MIPS64_R1
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
@@ -514,6 +518,7 @@
 	select SYS_SUPPORTS_SMARTMIPS
 	select SYS_SUPPORTS_MICROMIPS
 	select SYS_SUPPORTS_MIPS16
+	select SYS_SUPPORTS_RELOCATABLE
 	select USB_EHCI_BIG_ENDIAN_DESC
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select USE_OF
@@ -1153,6 +1158,13 @@
 config HOLES_IN_ZONE
 	bool
 
+config SYS_SUPPORTS_RELOCATABLE
+	bool
+	help
+	 Selected if the platform supports relocating the kernel.
+	 The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF
+	 to allow access to command line and entropy sources.
+
 #
 # Endianness selection.  Sufficiently obscure so many users don't know what to
 # answer,so we try hard to limit the available choices.  Also the use of a
@@ -1340,11 +1352,30 @@
 	select CPU_SUPPORTS_HUGEPAGES
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
+	select MIPS_PGD_C0_CONTEXT
 	select GPIOLIB
 	help
 		The Loongson 3 processor implements the MIPS64R2 instruction
 		set with many extensions.
 
+config LOONGSON3_ENHANCEMENT
+	bool "New Loongson 3 CPU Enhancements"
+	default n
+	select CPU_MIPSR2
+	select CPU_HAS_PREFETCH
+	depends on CPU_LOONGSON3
+	help
+	  New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A
+	  R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
+	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User
+	  Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
+	  Fast TLB refill support, etc.
+
+	  This option enable those enhancements which are not probed at run
+	  time. If you want a generic kernel to run on all Loongson 3 machines,
+	  please say 'N' here. If you want a high-performance kernel to run on
+	  new Loongson 3 machines only, please say 'Y' here.
+
 config CPU_LOONGSON2E
 	bool "Loongson 2E"
 	depends on SYS_HAS_CPU_LOONGSON2E
@@ -1373,6 +1404,8 @@
 	bool "Loongson 1B"
 	depends on SYS_HAS_CPU_LOONGSON1B
 	select CPU_LOONGSON1
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1B is a 32-bit SoC, which implements the MIPS32
 	  release 2 instruction set.
@@ -1671,6 +1704,7 @@
 	select CPU_HAS_PREFETCH
 	select CPU_MIPSR2
 	select CPU_SUPPORTS_HUGEPAGES
+	select MIPS_ASID_BITS_VARIABLE
 	help
 	  Netlogic Microsystems XLP processors.
 endchoice
@@ -1796,6 +1830,7 @@
 	select MIPS_L1_CACHE_SHIFT_6
 	select SYS_SUPPORTS_SMP
 	select SYS_SUPPORTS_HOTPLUG_CPU
+	select CPU_HAS_RIXI
 
 config CPU_BMIPS5000
 	bool
@@ -1803,10 +1838,12 @@
 	select MIPS_L1_CACHE_SHIFT_7
 	select SYS_SUPPORTS_SMP
 	select SYS_SUPPORTS_HOTPLUG_CPU
+	select CPU_HAS_RIXI
 
 config SYS_HAS_CPU_LOONGSON3
 	bool
 	select CPU_SUPPORTS_CPUFREQ
+	select CPU_HAS_RIXI
 
 config SYS_HAS_CPU_LOONGSON2E
 	bool
@@ -1959,11 +1996,15 @@
 config CPU_MIPSR2
 	bool
 	default y if CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_CAVIUM_OCTEON
+	select CPU_HAS_RIXI
 	select MIPS_SPRAM
 
 config CPU_MIPSR6
 	bool
 	default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
+	select CPU_HAS_RIXI
+	select HAVE_ARCH_BITREVERSE
+	select MIPS_ASID_BITS_VARIABLE
 	select MIPS_SPRAM
 
 config EVA
@@ -1997,7 +2038,7 @@
 #
 config HARDWARE_WATCHPOINTS
        bool
-       default y if CPU_MIPSR1 || CPU_MIPSR2
+       default y if CPU_MIPSR1 || CPU_MIPSR2 || CPU_MIPSR6
 
 menu "Kernel type"
 
@@ -2040,6 +2081,16 @@
 	  emulation when determining guest CPU Frequency. Instead, the guest's
 	  timer frequency is specified directly.
 
+config MIPS_VA_BITS_48
+	bool "48 bits virtual memory"
+	depends on 64BIT
+	help
+	  Support a maximum at least 48 bits of application virtual memory.
+	  Default is 40 bits or less, depending on the CPU.
+	  This option result in a small memory overhead for page tables.
+	  This option is only supported with 16k and 64k page sizes.
+	  If unsure, say N.
+
 choice
 	prompt "Kernel page size"
 	default PAGE_SIZE_4KB
@@ -2047,6 +2098,7 @@
 config PAGE_SIZE_4KB
 	bool "4kB"
 	depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
+	depends on !MIPS_VA_BITS_48
 	help
 	 This option select the standard 4kB Linux page size.  On some
 	 R3000-family processors this is the only available page size.  Using
@@ -2056,6 +2108,7 @@
 config PAGE_SIZE_8KB
 	bool "8kB"
 	depends on CPU_R8000 || CPU_CAVIUM_OCTEON
+	depends on !MIPS_VA_BITS_48
 	help
 	  Using 8kB page size will result in higher performance kernel at
 	  the price of higher memory consumption.  This option is available
@@ -2074,6 +2127,7 @@
 config PAGE_SIZE_32KB
 	bool "32kB"
 	depends on CPU_CAVIUM_OCTEON
+	depends on !MIPS_VA_BITS_48
 	help
 	  Using 32kB page size will result in higher performance kernel at
 	  the price of higher memory consumption.  This option is available
@@ -2278,7 +2332,7 @@
 
 config MIPS_CPS
 	bool "MIPS Coherent Processing System support"
-	depends on SYS_SUPPORTS_MIPS_CPS && !CPU_MIPSR6
+	depends on SYS_SUPPORTS_MIPS_CPS
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
@@ -2369,6 +2423,9 @@
 config XKS01
 	bool
 
+config CPU_HAS_RIXI
+	bool
+
 #
 # Vectored interrupt mode is an R2 feature
 #
@@ -2399,6 +2456,21 @@
 config CPU_R4400_WORKAROUNDS
 	bool
 
+config MIPS_ASID_SHIFT
+	int
+	default 6 if CPU_R3000 || CPU_TX39XX
+	default 4 if CPU_R8000
+	default 0
+
+config MIPS_ASID_BITS
+	int
+	default 0 if MIPS_ASID_BITS_VARIABLE
+	default 6 if CPU_R3000 || CPU_TX39XX
+	default 8
+
+config MIPS_ASID_BITS_VARIABLE
+	bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
@@ -2468,6 +2540,61 @@
 config SYS_SUPPORTS_NUMA
 	bool
 
+config RELOCATABLE
+	bool "Relocatable kernel"
+	depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6)
+	help
+	  This builds a kernel image that retains relocation information
+	  so it can be loaded someplace besides the default 1MB.
+	  The relocations make the kernel binary about 15% larger,
+	  but are discarded at runtime
+
+config RELOCATION_TABLE_SIZE
+	hex "Relocation table size"
+	depends on RELOCATABLE
+	range 0x0 0x01000000
+	default "0x00100000"
+	---help---
+	  A table of relocation data will be appended to the kernel binary
+	  and parsed at boot to fix up the relocated kernel.
+
+	  This option allows the amount of space reserved for the table to be
+	  adjusted, although the default of 1Mb should be ok in most cases.
+
+	  The build will fail and a valid size suggested if this is too small.
+
+	  If unsure, leave at the default value.
+
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	depends on RELOCATABLE
+	---help---
+	   Randomizes the physical and virtual address at which the
+	   kernel image is loaded, as a security feature that
+	   deters exploit attempts relying on knowledge of the location
+	   of kernel internals.
+
+	   Entropy is generated using any coprocessor 0 registers available.
+
+	   The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
+
+	   If unsure, say N.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+	hex "Maximum kASLR offset" if EXPERT
+	depends on RANDOMIZE_BASE
+	range 0x0 0x40000000 if EVA || 64BIT
+	range 0x0 0x08000000
+	default "0x01000000"
+	---help---
+	  When kASLR is active, this provides the maximum offset that will
+	  be applied to the kernel image. It should be set according to the
+	  amount of physical RAM available in the target system minus
+	  PHYSICAL_START and must be a power of 2.
+
+	  This is limited by the size of KSEG0, 256Mb on 32-bit or 1Gb with
+	  EVA or 64-bit. The default is 16Mb.
+
 config NODES_SHIFT
 	int
 	default "6"
@@ -2475,7 +2602,7 @@
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
+	depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
 	default y
 	help
 	  Enable hardware performance counter support for perf events. If
@@ -2808,6 +2935,10 @@
 
 	config MIPS_CMDLINE_FROM_BOOTLOADER
 		bool "Bootloader kernel arguments if available"
+
+	config MIPS_CMDLINE_BUILTIN_EXTEND
+		depends on CMDLINE_BOOL
+		bool "Extend builtin kernel arguments with bootloader arguments"
 endchoice
 
 endmenu
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e78d60d..efd7a9d 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -12,6 +12,9 @@
 # for "archclean" cleaning up for this architecture.
 #
 
+archscripts: scripts_basic
+	$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
+
 KBUILD_DEFCONFIG := ip22_defconfig
 
 #
@@ -93,6 +96,10 @@
 KBUILD_AFLAGS_MODULE		+= -mlong-calls
 KBUILD_CFLAGS_MODULE		+= -mlong-calls
 
+ifeq ($(CONFIG_RELOCATABLE),y)
+LDFLAGS_vmlinux			+= --emit-relocs
+endif
+
 #
 # pass -msoft-float to GAS if it supports it.  However on newer binutils
 # (specifically newer than 2.24.51.20140728) we then also need to explicitly
@@ -193,6 +200,8 @@
 toolchain-msa				:= $(call cc-option-yn,$(mips-cflags) -mhard-float -mfp64 -Wa$(comma)-mmsa)
 cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
+toolchain-virt				:= $(call cc-option-yn,$(mips-cflags) -mvirt)
+cflags-$(toolchain-virt)		+= -DTOOLCHAIN_SUPPORTS_VIRT
 
 cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER)	+= -mcompact-branches=never
 cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL)	+= -mcompact-branches=optimal
@@ -310,6 +319,10 @@
 		$(bootvars-y) $@
 endif
 
+CMD_RELOCS = arch/mips/boot/tools/relocs
+quiet_cmd_relocs = RELOCS  $<
+      cmd_relocs = $(CMD_RELOCS) $<
+
 #
 # Some machines like the Indy need 32-bit ELF binaries for booting purposes.
 # Other need ECOFF, so we build a 32-bit ELF binary for them which we then
@@ -318,6 +331,11 @@
 quiet_cmd_32 = OBJCOPY $@
 	cmd_32 = $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@
 vmlinux.32: vmlinux
+ifeq ($(CONFIG_RELOCATABLE)$(CONFIG_64BIT),yy)
+# Currently, objcopy fails to handle the relocations in the elf64
+# So the relocs tool must be run here to remove them first
+	$(call cmd,relocs)
+endif
 	$(call cmd,32)
 
 #
@@ -333,6 +351,9 @@
 
 # boot
 $(boot-y): $(vmlinux-32) FORCE
+ifeq ($(CONFIG_RELOCATABLE)$(CONFIG_32BIT),yy)
+	$(call cmd,relocs)
+endif
 	$(Q)$(MAKE) $(build)=arch/mips/boot VMLINUX=$(vmlinux-32) \
 		$(bootvars-y) arch/mips/boot/$@
 
@@ -385,6 +406,7 @@
 archclean:
 	$(Q)$(MAKE) $(clean)=arch/mips/boot
 	$(Q)$(MAKE) $(clean)=arch/mips/boot/compressed
+	$(Q)$(MAKE) $(clean)=arch/mips/boot/tools
 	$(Q)$(MAKE) $(clean)=arch/mips/lasat
 
 define archhelp
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index bd34f40..7ba7ea0 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -1043,8 +1043,7 @@
 
 	/* Root of the Alchemy clock tree: external 12MHz crystal osc */
 	c = clk_register_fixed_rate(NULL, ALCHEMY_ROOT_CLK, NULL,
-					   CLK_IS_ROOT,
-					   ALCHEMY_ROOTCLK_RATE);
+					   0, ALCHEMY_ROOTCLK_RATE);
 	ERRCK(c)
 
 	/* CPU core clock */
diff --git a/arch/mips/ath79/Kconfig b/arch/mips/ath79/Kconfig
index 13c04cf..dfc6020 100644
--- a/arch/mips/ath79/Kconfig
+++ b/arch/mips/ath79/Kconfig
@@ -71,18 +71,6 @@
 	  Say 'Y' here if you want your kernel to support the
 	  Ubiquiti Networks XM (rev 1.0) board.
 
-choice
-	prompt "Build a DTB in the kernel"
-	optional
-	help
-	  Select a devicetree that should be built into the kernel.
-
-	config DTB_TL_WR1043ND_V1
-		bool "TL-WR1043ND Version 1"
-		select BUILTIN_DTB
-		select SOC_AR913X
-endchoice
-
 endmenu
 
 config SOC_AR71XX
diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index 618dfd7..2e73784 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -18,17 +18,21 @@
 #include <linux/clk.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <dt-bindings/clock/ath79-clk.h>
 
 #include <asm/div64.h>
 
 #include <asm/mach-ath79/ath79.h>
 #include <asm/mach-ath79/ar71xx_regs.h>
 #include "common.h"
+#include "machtypes.h"
 
 #define AR71XX_BASE_FREQ	40000000
 #define AR724X_BASE_FREQ	40000000
 
-static struct clk *clks[3];
+static struct clk *clks[ATH79_CLK_END];
 static struct clk_onecell_data clk_data = {
 	.clks = clks,
 	.clk_num = ARRAY_SIZE(clks),
@@ -40,7 +44,7 @@
 	struct clk *clk;
 	int err;
 
-	clk = clk_register_fixed_rate(NULL, id, NULL, CLK_IS_ROOT, rate);
+	clk = clk_register_fixed_rate(NULL, id, NULL, 0, rate);
 	if (!clk)
 		panic("failed to allocate %s clock structure", id);
 
@@ -78,59 +82,123 @@
 	ahb_rate = cpu_rate / div;
 
 	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
+	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
+	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
+static struct clk * __init ath79_reg_ffclk(const char *name,
+		const char *parent_name, unsigned int mult, unsigned int div)
+{
+	struct clk *clk;
+
+	clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div);
+	if (!clk)
+		panic("failed to allocate %s clock structure", name);
+
+	return clk;
+}
+
+static void __init ar724x_clk_init(struct clk *ref_clk, void __iomem *pll_base)
+{
+	u32 pll;
+	u32 mult, div, ddr_div, ahb_div;
+
+	pll = __raw_readl(pll_base + AR724X_PLL_REG_CPU_CONFIG);
+
+	mult = ((pll >> AR724X_PLL_FB_SHIFT) & AR724X_PLL_FB_MASK);
+	div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK) * 2;
+
+	ddr_div = ((pll >> AR724X_DDR_DIV_SHIFT) & AR724X_DDR_DIV_MASK) + 1;
+	ahb_div = (((pll >> AR724X_AHB_DIV_SHIFT) & AR724X_AHB_DIV_MASK) + 1) * 2;
+
+	clks[ATH79_CLK_CPU] = ath79_reg_ffclk("cpu", "ref", mult, div);
+	clks[ATH79_CLK_DDR] = ath79_reg_ffclk("ddr", "ref", mult, div * ddr_div);
+	clks[ATH79_CLK_AHB] = ath79_reg_ffclk("ahb", "ref", mult, div * ahb_div);
+}
+
 static void __init ar724x_clocks_init(void)
 {
-	unsigned long ref_rate;
-	unsigned long cpu_rate;
-	unsigned long ddr_rate;
-	unsigned long ahb_rate;
-	u32 pll;
-	u32 freq;
-	u32 div;
+	struct clk *ref_clk;
 
-	ref_rate = AR724X_BASE_FREQ;
-	pll = ath79_pll_rr(AR724X_PLL_REG_CPU_CONFIG);
+	ref_clk = ath79_add_sys_clkdev("ref", AR724X_BASE_FREQ);
 
-	div = ((pll >> AR724X_PLL_FB_SHIFT) & AR724X_PLL_FB_MASK);
-	freq = div * ref_rate;
+	ar724x_clk_init(ref_clk, ath79_pll_base);
 
-	div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK) * 2;
-	freq /= div;
-
-	cpu_rate = freq;
-
-	div = ((pll >> AR724X_DDR_DIV_SHIFT) & AR724X_DDR_DIV_MASK) + 1;
-	ddr_rate = freq / div;
-
-	div = (((pll >> AR724X_AHB_DIV_SHIFT) & AR724X_AHB_DIV_MASK) + 1) * 2;
-	ahb_rate = cpu_rate / div;
-
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	/* just make happy plat_time_init() from arch/mips/ath79/setup.c */
+	clk_register_clkdev(clks[ATH79_CLK_CPU], "cpu", NULL);
+	clk_register_clkdev(clks[ATH79_CLK_DDR], "ddr", NULL);
+	clk_register_clkdev(clks[ATH79_CLK_AHB], "ahb", NULL);
 
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
+static void __init ar9330_clk_init(struct clk *ref_clk, void __iomem *pll_base)
+{
+	u32 clock_ctrl;
+	u32 ref_div;
+	u32 ninit_mul;
+	u32 out_div;
+
+	u32 cpu_div;
+	u32 ddr_div;
+	u32 ahb_div;
+
+	clock_ctrl = __raw_readl(pll_base + AR933X_PLL_CLOCK_CTRL_REG);
+	if (clock_ctrl & AR933X_PLL_CLOCK_CTRL_BYPASS) {
+		ref_div = 1;
+		ninit_mul = 1;
+		out_div = 1;
+
+		cpu_div = 1;
+		ddr_div = 1;
+		ahb_div = 1;
+	} else {
+		u32 cpu_config;
+		u32 t;
+
+		cpu_config = __raw_readl(pll_base + AR933X_PLL_CPU_CONFIG_REG);
+
+		t = (cpu_config >> AR933X_PLL_CPU_CONFIG_REFDIV_SHIFT) &
+		    AR933X_PLL_CPU_CONFIG_REFDIV_MASK;
+		ref_div = t;
+
+		ninit_mul = (cpu_config >> AR933X_PLL_CPU_CONFIG_NINT_SHIFT) &
+		    AR933X_PLL_CPU_CONFIG_NINT_MASK;
+
+		t = (cpu_config >> AR933X_PLL_CPU_CONFIG_OUTDIV_SHIFT) &
+		    AR933X_PLL_CPU_CONFIG_OUTDIV_MASK;
+		if (t == 0)
+			t = 1;
+
+		out_div = (1 << t);
+
+		cpu_div = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_CPU_DIV_SHIFT) &
+		     AR933X_PLL_CLOCK_CTRL_CPU_DIV_MASK) + 1;
+
+		ddr_div = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_DDR_DIV_SHIFT) &
+		      AR933X_PLL_CLOCK_CTRL_DDR_DIV_MASK) + 1;
+
+		ahb_div = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_AHB_DIV_SHIFT) &
+		     AR933X_PLL_CLOCK_CTRL_AHB_DIV_MASK) + 1;
+	}
+
+	clks[ATH79_CLK_CPU] = ath79_reg_ffclk("cpu", "ref",
+					ninit_mul, ref_div * out_div * cpu_div);
+	clks[ATH79_CLK_DDR] = ath79_reg_ffclk("ddr", "ref",
+					ninit_mul, ref_div * out_div * ddr_div);
+	clks[ATH79_CLK_AHB] = ath79_reg_ffclk("ahb", "ref",
+					ninit_mul, ref_div * out_div * ahb_div);
+}
+
 static void __init ar933x_clocks_init(void)
 {
+	struct clk *ref_clk;
 	unsigned long ref_rate;
-	unsigned long cpu_rate;
-	unsigned long ddr_rate;
-	unsigned long ahb_rate;
-	u32 clock_ctrl;
-	u32 cpu_config;
-	u32 freq;
 	u32 t;
 
 	t = ath79_reset_rr(AR933X_RESET_REG_BOOTSTRAP);
@@ -139,46 +207,14 @@
 	else
 		ref_rate = (25 * 1000 * 1000);
 
-	clock_ctrl = ath79_pll_rr(AR933X_PLL_CLOCK_CTRL_REG);
-	if (clock_ctrl & AR933X_PLL_CLOCK_CTRL_BYPASS) {
-		cpu_rate = ref_rate;
-		ahb_rate = ref_rate;
-		ddr_rate = ref_rate;
-	} else {
-		cpu_config = ath79_pll_rr(AR933X_PLL_CPU_CONFIG_REG);
+	ref_clk = ath79_add_sys_clkdev("ref", ref_rate);
 
-		t = (cpu_config >> AR933X_PLL_CPU_CONFIG_REFDIV_SHIFT) &
-		    AR933X_PLL_CPU_CONFIG_REFDIV_MASK;
-		freq = ref_rate / t;
+	ar9330_clk_init(ref_clk, ath79_pll_base);
 
-		t = (cpu_config >> AR933X_PLL_CPU_CONFIG_NINT_SHIFT) &
-		    AR933X_PLL_CPU_CONFIG_NINT_MASK;
-		freq *= t;
-
-		t = (cpu_config >> AR933X_PLL_CPU_CONFIG_OUTDIV_SHIFT) &
-		    AR933X_PLL_CPU_CONFIG_OUTDIV_MASK;
-		if (t == 0)
-			t = 1;
-
-		freq >>= t;
-
-		t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_CPU_DIV_SHIFT) &
-		     AR933X_PLL_CLOCK_CTRL_CPU_DIV_MASK) + 1;
-		cpu_rate = freq / t;
-
-		t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_DDR_DIV_SHIFT) &
-		      AR933X_PLL_CLOCK_CTRL_DDR_DIV_MASK) + 1;
-		ddr_rate = freq / t;
-
-		t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_AHB_DIV_SHIFT) &
-		     AR933X_PLL_CLOCK_CTRL_AHB_DIV_MASK) + 1;
-		ahb_rate = freq / t;
-	}
-
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	/* just make happy plat_time_init() from arch/mips/ath79/setup.c */
+	clk_register_clkdev(clks[ATH79_CLK_CPU], "cpu", NULL);
+	clk_register_clkdev(clks[ATH79_CLK_DDR], "ddr", NULL);
+	clk_register_clkdev(clks[ATH79_CLK_AHB], "ahb", NULL);
 
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -310,9 +346,9 @@
 		ahb_rate = cpu_pll / (postdiv + 1);
 
 	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
+	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
+	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -397,9 +433,9 @@
 		ahb_rate = cpu_pll / (postdiv + 1);
 
 	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
+	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
+	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -419,8 +455,6 @@
 		qca955x_clocks_init();
 	else
 		BUG();
-
-	of_clk_init(NULL);
 }
 
 unsigned long __init
@@ -447,8 +481,49 @@
 
 CLK_OF_DECLARE(ar7100, "qca,ar7100-pll", ath79_clocks_init_dt);
 CLK_OF_DECLARE(ar7240, "qca,ar7240-pll", ath79_clocks_init_dt);
-CLK_OF_DECLARE(ar9130, "qca,ar9130-pll", ath79_clocks_init_dt);
-CLK_OF_DECLARE(ar9330, "qca,ar9330-pll", ath79_clocks_init_dt);
 CLK_OF_DECLARE(ar9340, "qca,ar9340-pll", ath79_clocks_init_dt);
 CLK_OF_DECLARE(ar9550, "qca,qca9550-pll", ath79_clocks_init_dt);
+
+static void __init ath79_clocks_init_dt_ng(struct device_node *np)
+{
+	struct clk *ref_clk;
+	void __iomem *pll_base;
+	const char *dnfn = of_node_full_name(np);
+
+	ref_clk = of_clk_get(np, 0);
+	if (IS_ERR(ref_clk)) {
+		pr_err("%s: of_clk_get failed\n", dnfn);
+		goto err;
+	}
+
+	pll_base = of_iomap(np, 0);
+	if (!pll_base) {
+		pr_err("%s: can't map pll registers\n", dnfn);
+		goto err_clk;
+	}
+
+	if (of_device_is_compatible(np, "qca,ar9130-pll"))
+		ar724x_clk_init(ref_clk, pll_base);
+	else if (of_device_is_compatible(np, "qca,ar9330-pll"))
+		ar9330_clk_init(ref_clk, pll_base);
+	else {
+		pr_err("%s: could not find any appropriate clk_init()\n", dnfn);
+		goto err_clk;
+	}
+
+	if (of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data)) {
+		pr_err("%s: could not register clk provider\n", dnfn);
+		goto err_clk;
+	}
+
+	return;
+
+err_clk:
+	clk_put(ref_clk);
+
+err:
+	return;
+}
+CLK_OF_DECLARE(ar9130_clk, "qca,ar9130-pll", ath79_clocks_init_dt_ng);
+CLK_OF_DECLARE(ar9330_clk, "qca,ar9330-pll", ath79_clocks_init_dt_ng);
 #endif
diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c
index 3cedd1f..d071a3a 100644
--- a/arch/mips/ath79/common.c
+++ b/arch/mips/ath79/common.c
@@ -46,12 +46,12 @@
 {
 	ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE,
 					 AR71XX_DDR_CTRL_SIZE);
-	if (soc_is_ar71xx() || soc_is_ar934x()) {
-		ath79_ddr_wb_flush_base = ath79_ddr_base + 0x9c;
-		ath79_ddr_pci_win_base = ath79_ddr_base + 0x7c;
-	} else {
+	if (soc_is_ar913x() || soc_is_ar724x() || soc_is_ar933x()) {
 		ath79_ddr_wb_flush_base = ath79_ddr_base + 0x7c;
 		ath79_ddr_pci_win_base = 0;
+	} else {
+		ath79_ddr_wb_flush_base = ath79_ddr_base + 0x9c;
+		ath79_ddr_pci_win_base = ath79_ddr_base + 0x7c;
 	}
 }
 EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init);
@@ -76,14 +76,14 @@
 {
 	BUG_ON(!ath79_ddr_pci_win_base);
 
-	__raw_writel(AR71XX_PCI_WIN0_OFFS, ath79_ddr_pci_win_base + 0);
-	__raw_writel(AR71XX_PCI_WIN1_OFFS, ath79_ddr_pci_win_base + 1);
-	__raw_writel(AR71XX_PCI_WIN2_OFFS, ath79_ddr_pci_win_base + 2);
-	__raw_writel(AR71XX_PCI_WIN3_OFFS, ath79_ddr_pci_win_base + 3);
-	__raw_writel(AR71XX_PCI_WIN4_OFFS, ath79_ddr_pci_win_base + 4);
-	__raw_writel(AR71XX_PCI_WIN5_OFFS, ath79_ddr_pci_win_base + 5);
-	__raw_writel(AR71XX_PCI_WIN6_OFFS, ath79_ddr_pci_win_base + 6);
-	__raw_writel(AR71XX_PCI_WIN7_OFFS, ath79_ddr_pci_win_base + 7);
+	__raw_writel(AR71XX_PCI_WIN0_OFFS, ath79_ddr_pci_win_base + 0x0);
+	__raw_writel(AR71XX_PCI_WIN1_OFFS, ath79_ddr_pci_win_base + 0x4);
+	__raw_writel(AR71XX_PCI_WIN2_OFFS, ath79_ddr_pci_win_base + 0x8);
+	__raw_writel(AR71XX_PCI_WIN3_OFFS, ath79_ddr_pci_win_base + 0xc);
+	__raw_writel(AR71XX_PCI_WIN4_OFFS, ath79_ddr_pci_win_base + 0x10);
+	__raw_writel(AR71XX_PCI_WIN5_OFFS, ath79_ddr_pci_win_base + 0x14);
+	__raw_writel(AR71XX_PCI_WIN6_OFFS, ath79_ddr_pci_win_base + 0x18);
+	__raw_writel(AR71XX_PCI_WIN7_OFFS, ath79_ddr_pci_win_base + 0x1c);
 }
 EXPORT_SYMBOL_GPL(ath79_ddr_set_pci_windows);
 
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index be451ee4a..7adab18 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -17,6 +17,7 @@
 #include <linux/bootmem.h>
 #include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
 
@@ -203,26 +204,57 @@
 	fdt_start = fw_getenvl("fdt_start");
 	if (fdt_start)
 		__dt_setup_arch((void *)KSEG0ADDR(fdt_start));
-#ifdef CONFIG_BUILTIN_DTB
-	else
-		__dt_setup_arch(__dtb_start);
-#endif
+	else if (fw_arg0 == -2)
+		__dt_setup_arch((void *)KSEG0ADDR(fw_arg1));
 
-	ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE,
-					   AR71XX_RESET_SIZE);
-	ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
-					 AR71XX_PLL_SIZE);
-	ath79_detect_sys_type();
-	ath79_ddr_ctrl_init();
+	if (mips_machtype != ATH79_MACH_GENERIC_OF) {
+		ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE,
+						   AR71XX_RESET_SIZE);
+		ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
+						 AR71XX_PLL_SIZE);
+		ath79_detect_sys_type();
+		ath79_ddr_ctrl_init();
 
-	if (mips_machtype != ATH79_MACH_GENERIC_OF)
 		detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX);
 
-	_machine_restart = ath79_restart;
+		/* OF machines should use the reset driver */
+		_machine_restart = ath79_restart;
+	}
+
 	_machine_halt = ath79_halt;
 	pm_power_off = ath79_halt;
 }
 
+static void __init ath79_of_plat_time_init(void)
+{
+	struct device_node *np;
+	struct clk *clk;
+	unsigned long cpu_clk_rate;
+
+	of_clk_init(NULL);
+
+	np = of_get_cpu_node(0, NULL);
+	if (!np) {
+		pr_err("Failed to get CPU node\n");
+		return;
+	}
+
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk));
+		return;
+	}
+
+	cpu_clk_rate = clk_get_rate(clk);
+
+	pr_info("CPU clock: %lu.%03lu MHz\n",
+		cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000);
+
+	mips_hpt_frequency = cpu_clk_rate / 2;
+
+	clk_put(clk);
+}
+
 void __init plat_time_init(void)
 {
 	unsigned long cpu_clk_rate;
@@ -230,6 +262,11 @@
 	unsigned long ddr_clk_rate;
 	unsigned long ref_clk_rate;
 
+	if (IS_ENABLED(CONFIG_OF) && mips_machtype == ATH79_MACH_GENERIC_OF) {
+		ath79_of_plat_time_init();
+		return;
+	}
+
 	ath79_clocks_init();
 
 	cpu_clk_rate = ath79_get_sys_clk_rate("cpu");
diff --git a/arch/mips/bcm47xx/Makefile b/arch/mips/bcm47xx/Makefile
index 66bea4e..6d86150 100644
--- a/arch/mips/bcm47xx/Makefile
+++ b/arch/mips/bcm47xx/Makefile
@@ -3,5 +3,5 @@
 # under Linux.
 #
 
-obj-y				+= irq.o prom.o serial.o setup.o time.o sprom.o
+obj-y				+= irq.o prom.o serial.o setup.o time.o
 obj-y				+= board.o buttons.o leds.o workarounds.o
diff --git a/arch/mips/bcm47xx/bcm47xx_private.h b/arch/mips/bcm47xx/bcm47xx_private.h
index 41796be..0367ac7 100644
--- a/arch/mips/bcm47xx/bcm47xx_private.h
+++ b/arch/mips/bcm47xx/bcm47xx_private.h
@@ -10,9 +10,6 @@
 /* prom.c */
 void __init bcm47xx_prom_highmem_init(void);
 
-/* sprom.c */
-void bcm47xx_sprom_register_fallbacks(void);
-
 /* buttons.c */
 int __init bcm47xx_buttons_register(void);
 
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index c807e32..6054d49 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -28,6 +28,7 @@
 
 #include "bcm47xx_private.h"
 
+#include <linux/bcm47xx_sprom.h>
 #include <linux/export.h>
 #include <linux/types.h>
 #include <linux/ethtool.h>
@@ -151,7 +152,6 @@
 		pr_info("Using bcma bus\n");
 #ifdef CONFIG_BCM47XX_BCMA
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA;
-		bcm47xx_sprom_register_fallbacks();
 		bcm47xx_register_bcma();
 		bcm47xx_set_system_type(bcm47xx_bus.bcma.bus.chipinfo.id);
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/mips/bmips/Kconfig b/arch/mips/bmips/Kconfig
index e2c4fd6..264328d 100644
--- a/arch/mips/bmips/Kconfig
+++ b/arch/mips/bmips/Kconfig
@@ -21,6 +21,10 @@
 	bool "BCM93384WVG Viper CPU (EXPERIMENTAL)"
 	select BUILTIN_DTB
 
+config DT_BCM96358NB4SER
+	bool "BCM96358NB4SER"
+	select BUILTIN_DTB
+
 config DT_BCM96368MVWG
 	bool "BCM96368MVWG"
 	select BUILTIN_DTB
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 3553528..f146d12 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -95,6 +95,15 @@
 		bcm63xx_fixup_cpu1();
 }
 
+static void bcm6358_quirks(void)
+{
+	/*
+	 * BCM6358 needs special handling for its shared TLB, so
+	 * disable SMP for now
+	 */
+	bmips_smp_enabled = 0;
+}
+
 static void bcm6368_quirks(void)
 {
 	bcm63xx_fixup_cpu1();
@@ -104,13 +113,16 @@
 	{ "brcm,bcm3384-viper",		&bcm3384_viper_quirks		},
 	{ "brcm,bcm33843-viper",	&bcm3384_viper_quirks		},
 	{ "brcm,bcm6328",		&bcm6328_quirks			},
+	{ "brcm,bcm6358",		&bcm6358_quirks			},
 	{ "brcm,bcm6368",		&bcm6368_quirks			},
 	{ "brcm,bcm63168",		&bcm6368_quirks			},
+	{ "brcm,bcm63268",		&bcm6368_quirks			},
 	{ },
 };
 
 void __init prom_init(void)
 {
+	bmips_cpu_setup();
 	register_bmips_smp_ops();
 }
 
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 309d2ad..90aca95 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -37,8 +37,13 @@
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART_PROM) += $(obj)/uart-prom.o
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)		   += $(obj)/uart-alchemy.o
+vmlinuzobjs-$(CONFIG_ATH79)			   += $(obj)/uart-ath79.o
 endif
 
+extra-y += uart-ath79.c
+$(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
+	$(call cmd,shipped)
+
 vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o $(obj)/bswapsi.o
 
 extra-y += ashldi3.c bswapsi.c
diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile
index eabeb60..fda9d38 100644
--- a/arch/mips/boot/dts/brcm/Makefile
+++ b/arch/mips/boot/dts/brcm/Makefile
@@ -1,5 +1,6 @@
 dtb-$(CONFIG_DT_BCM93384WVG)		+= bcm93384wvg.dtb
 dtb-$(CONFIG_DT_BCM93384WVG_VIPER)	+= bcm93384wvg_viper.dtb
+dtb-$(CONFIG_DT_BCM96358NB4SER)		+= bcm96358nb4ser.dtb
 dtb-$(CONFIG_DT_BCM96368MVWG)		+= bcm96368mvwg.dtb
 dtb-$(CONFIG_DT_BCM9EJTAGPRB)		+= bcm9ejtagprb.dtb
 dtb-$(CONFIG_DT_BCM97125CBMB)		+= bcm97125cbmb.dtb
@@ -14,6 +15,7 @@
 dtb-$(CONFIG_DT_NONE)			+= \
 						bcm93384wvg.dtb		\
 						bcm93384wvg_viper.dtb	\
+						bcm96358nb4ser.dtb	\
 						bcm96368mvwg.dtb	\
 						bcm9ejtagprb.dtb	\
 						bcm97125cbmb.dtb	\
diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi
index 9d19236..5633b9d 100644
--- a/arch/mips/boot/dts/brcm/bcm6328.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi
@@ -23,7 +23,7 @@
 	};
 
 	clocks {
-		periph_clk: periph_clk {
+		periph_clk: periph-clk {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
 			clock-frequency = <50000000>;
@@ -31,11 +31,11 @@
 	};
 
 	aliases {
-		leds0 = &leds0;
-		uart0 = &uart0;
+		serial0 = &uart0;
+		serial1 = &uart1;
 	};
 
-	cpu_intc: cpu_intc {
+	cpu_intc: interrupt-controller {
 		#address-cells = <0>;
 		compatible = "mti,cpu-interrupt-controller";
 
@@ -50,16 +50,16 @@
 		compatible = "simple-bus";
 		ranges;
 
-		periph_intc: periph_intc@10000020 {
-			compatible = "brcm,bcm3380-l2-intc";
-			reg = <0x10000024 0x4 0x1000002c 0x4>,
-			      <0x10000020 0x4 0x10000028 0x4>;
+		periph_intc: interrupt-controller@10000020 {
+			compatible = "brcm,bcm6345-l1-intc";
+			reg = <0x10000020 0x10>,
+			      <0x10000030 0x10>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&cpu_intc>;
-			interrupts = <2>;
+			interrupts = <2>, <3>;
 		};
 
 		uart0: serial@10000100 {
@@ -71,13 +71,22 @@
 			status = "disabled";
 		};
 
-		timer: timer@10000040 {
+		uart1: serial@10000120 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x10000120 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <39>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		timer: syscon@10000040 {
 			compatible = "syscon";
 			reg = <0x10000040 0x2c>;
 			native-endian;
 		};
 
-		reboot {
+		reboot: syscon-reboot@10000068 {
 			compatible = "syscon-reboot";
 			regmap = <&timer>;
 			offset = <0x28>;
@@ -91,5 +100,24 @@
 			reg = <0x10000800 0x24>;
 			status = "disabled";
 		};
+
+		ehci: usb@10002500 {
+			compatible = "brcm,bcm6328-ehci", "generic-ehci";
+			reg = <0x10002500 0x100>;
+			big-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <42>;
+			status = "disabled";
+		};
+
+		ohci: usb@10002600 {
+			compatible = "brcm,bcm6328-ohci", "generic-ohci";
+			reg = <0x10002600 0x100>;
+			big-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <41>;
+			status = "disabled";
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm6358.dtsi b/arch/mips/boot/dts/brcm/bcm6358.dtsi
new file mode 100644
index 0000000..f9d8d39
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm6358.dtsi
@@ -0,0 +1,130 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm6358";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <150000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	clocks {
+		periph_clk: periph-clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+		};
+	};
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+	};
+
+	cpu_intc: interrupt-controller {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	ubus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges;
+
+		periph_cntl: syscon@fffe0000 {
+			compatible = "syscon";
+			reg = <0xfffe0000 0xc>;
+			native-endian;
+		};
+
+		reboot: syscon-reboot@fffe0008 {
+			compatible = "syscon-reboot";
+			regmap = <&periph_cntl>;
+			offset = <0x8>;
+			mask = <0x1>;
+		};
+
+		periph_intc: interrupt-controller@fffe000c {
+			compatible = "brcm,bcm6345-l1-intc";
+			reg = <0xfffe000c 0x8>,
+			      <0xfffe0038 0x8>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		leds0: led-controller@fffe00d0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "brcm,bcm6358-leds";
+			reg = <0xfffe00d0 0x8>;
+
+			status = "disabled";
+		};
+
+		uart0: serial@fffe0100 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0xfffe0100 0x18>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <2>;
+
+			clocks = <&periph_clk>;
+
+			status = "disabled";
+		};
+
+		uart1: serial@fffe0120 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0xfffe0120 0x18>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <3>;
+
+			clocks = <&periph_clk>;
+
+			status = "disabled";
+		};
+
+		ehci: usb@fffe1300 {
+			compatible = "brcm,bcm6358-ehci", "generic-ehci";
+			reg = <0xfffe1300 0x100>;
+			big-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <10>;
+			status = "disabled";
+		};
+
+		ohci: usb@fffe1400 {
+			compatible = "brcm,bcm6358-ohci", "generic-ohci";
+			reg = <0xfffe1400 0x100>;
+			big-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <5>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi
index 1f6b9b5..d0e3a70 100644
--- a/arch/mips/boot/dts/brcm/bcm6368.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi
@@ -20,11 +20,10 @@
 			device_type = "cpu";
 			reg = <1>;
 		};
-
 	};
 
 	clocks {
-		periph_clk: periph_clk {
+		periph_clk: periph-clk {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
 			clock-frequency = <50000000>;
@@ -32,11 +31,11 @@
 	};
 
 	aliases {
-		leds0 = &leds0;
-		uart0 = &uart0;
+		serial0 = &uart0;
+		serial1 = &uart1;
 	};
 
-	cpu_intc: cpu_intc {
+	cpu_intc: interrupt-controller {
 		#address-cells = <0>;
 		compatible = "mti,cpu-interrupt-controller";
 
@@ -64,16 +63,16 @@
 			mask = <0x1>;
 		};
 
-		periph_intc: periph_intc@10000020 {
-			compatible = "brcm,bcm3380-l2-intc";
-			reg = <0x10000024 0x4 0x1000002c 0x4>,
-			      <0x10000020 0x4 0x10000028 0x4>;
+		periph_intc: interrupt-controller@10000020 {
+			compatible = "brcm,bcm6345-l1-intc";
+			reg = <0x10000020 0x10>,
+			      <0x10000030 0x10>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&cpu_intc>;
-			interrupts = <2>;
+			interrupts = <2>, <3>;
 		};
 
 		leds0: led-controller@100000d0 {
@@ -93,7 +92,16 @@
 			status = "disabled";
 		};
 
-		ehci0: usb@10001500 {
+		uart1: serial@10000120 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x10000120 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <3>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		ehci: usb@10001500 {
 			compatible = "brcm,bcm6368-ehci", "generic-ehci";
 			reg = <0x10001500 0x100>;
 			big-endian;
@@ -102,7 +110,7 @@
 			status = "disabled";
 		};
 
-		ohci0: usb@10001600 {
+		ohci: usb@10001600 {
 			compatible = "brcm,bcm6368-ohci", "generic-ohci";
 			reg = <0x10001600 0x100>;
 			big-endian;
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 3ae1605..550e1d9 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -85,14 +85,15 @@
 			compatible = "brcm,bcm7120-l2-intc";
 			reg = <0x406780 0x8>;
 
-			brcm,int-map-mask = <0x44>;
+			brcm,int-map-mask = <0x44>, <0xf000000>;
 			brcm,int-fwd-mask = <0x70000>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&periph_intc>;
-			interrupts = <18>;
+			interrupts = <18>, <19>;
+			interrupt-names = "upg_main", "upg_bsc";
 		};
 
 		sun_top_ctrl: syscon@404000 {
@@ -118,6 +119,70 @@
 			status = "disabled";
 		};
 
+		uart1: serial@406b40 {
+			compatible = "ns16550a";
+			reg = <0x406b40 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <64>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart2: serial@406b80 {
+			compatible = "ns16550a";
+			reg = <0x406b80 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		bsca: i2c@406200 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406200 0x58>;
+		      interrupts = <24>;
+		      interrupt-names = "upg_bsca";
+		      status = "disabled";
+		};
+
+		bscb: i2c@406280 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406280 0x58>;
+		      interrupts = <25>;
+		      interrupt-names = "upg_bscb";
+		      status = "disabled";
+		};
+
+		bscc: i2c@406300 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406300 0x58>;
+		      interrupts = <26>;
+		      interrupt-names = "upg_bscc";
+		      status = "disabled";
+		};
+
+		bscd: i2c@406380 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406380 0x58>;
+		      interrupts = <27>;
+		      interrupt-names = "upg_bscd";
+		      status = "disabled";
+		};
+
 		ehci0: usb@488300 {
 			compatible = "brcm,bcm7125-ehci", "generic-ehci";
 			reg = <0x488300 0x100>;
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index be79919..ec95906 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -24,8 +24,6 @@
 
 	aliases {
 		uart0 = &uart0;
-		uart1 = &uart1;
-		uart2 = &uart2;
 	};
 
 	cpu_intc: cpu_intc {
@@ -323,8 +321,6 @@
 			interrupts = <40>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			brcm,broken-ncq;
-			brcm,broken-phy;
 			status = "disabled";
 
 			sata0: sata-port@0 {
@@ -338,7 +334,7 @@
 			};
 		};
 
-		sata_phy: sata-phy@1800000 {
+		sata_phy: sata-phy@180100 {
 			compatible = "brcm,bcm7425-sata-phy", "brcm,phy-sata3";
 			reg = <0x180100 0x0eff>;
 			reg-names = "phy";
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 060805b..ca57fb5 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -18,8 +18,6 @@
 
 	aliases {
 		uart0 = &uart0;
-		uart1 = &uart1;
-		uart2 = &uart2;
 	};
 
 	cpu_intc: cpu_intc {
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index bcdb09b..1c0c3d4 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -18,8 +18,6 @@
 
 	aliases {
 		uart0 = &uart0;
-		uart1 = &uart1;
-		uart2 = &uart2;
 	};
 
 	cpu_intc: cpu_intc {
@@ -241,5 +239,45 @@
 			interrupts = <66>;
 			status = "disabled";
 		};
+
+		sata: sata@181000 {
+			compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
+			reg-names = "ahci", "top-ctrl";
+			reg = <0x181000 0xa9c>, <0x180020 0x1c>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <86>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata0: sata-port@0 {
+				reg = <0>;
+				phys = <&sata_phy0>;
+			};
+
+			sata1: sata-port@1 {
+				reg = <1>;
+				phys = <&sata_phy1>;
+			};
+		};
+
+		sata_phy: sata-phy@180100 {
+			compatible = "brcm,bcm7425-sata-phy", "brcm,phy-sata3";
+			reg = <0x180100 0x0eff>;
+			reg-names = "phy";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata_phy0: sata-phy@0 {
+				reg = <0>;
+				#phy-cells = <0>;
+			};
+
+			sata_phy1: sata-phy@1 {
+				reg = <1>;
+				#phy-cells = <0>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index d3b1b76..6b4713a 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -24,8 +24,6 @@
 
 	aliases {
 		uart0 = &uart0;
-		uart1 = &uart1;
-		uart2 = &uart2;
 	};
 
 	cpu_intc: cpu_intc {
@@ -246,8 +244,6 @@
 			interrupts = <86>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			brcm,broken-ncq;
-			brcm,broken-phy;
 			status = "disabled";
 
 			sata0: sata-port@0 {
@@ -261,7 +257,7 @@
 			};
 		};
 
-		sata_phy: sata-phy@1800000 {
+		sata_phy: sata-phy@180100 {
 			compatible = "brcm,bcm7425-sata-phy", "brcm,phy-sata3";
 			reg = <0x180100 0x0eff>;
 			reg-names = "phy";
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index 3302a1b..0586bf6 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -86,14 +86,15 @@
 			compatible = "brcm,bcm7120-l2-intc";
 			reg = <0x406780 0x8>;
 
-			brcm,int-map-mask = <0x44>;
+			brcm,int-map-mask = <0x44>, <0x1f000000>;
 			brcm,int-fwd-mask = <0x70000>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&periph_intc>;
-			interrupts = <18>;
+			interrupts = <18>, <19>;
+			interrupt-names = "upg_main", "upg_bsc";
 		};
 
 		sun_top_ctrl: syscon@404000 {
@@ -118,6 +119,78 @@
 			status = "disabled";
 		};
 
+		uart1: serial@406b40 {
+			compatible = "ns16550a";
+			reg = <0x406b40 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <64>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart2: serial@406b80 {
+			compatible = "ns16550a";
+			reg = <0x406b80 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		bsca: i2c@406200 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406200 0x58>;
+		      interrupts = <24>;
+		      interrupt-names = "upg_bsca";
+		      status = "disabled";
+		};
+
+		bscb: i2c@406280 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406280 0x58>;
+		      interrupts = <25>;
+		      interrupt-names = "upg_bscb";
+		      status = "disabled";
+		};
+
+		bscc: i2c@406300 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406300 0x58>;
+		      interrupts = <26>;
+		      interrupt-names = "upg_bscc";
+		      status = "disabled";
+		};
+
+		bscd: i2c@406380 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406380 0x58>;
+		      interrupts = <27>;
+		      interrupt-names = "upg_bscd";
+		      status = "disabled";
+		};
+
+		bsce: i2c@406800 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406800 0x58>;
+		      interrupts = <28>;
+		      interrupt-names = "upg_bsce";
+		      status = "disabled";
+		};
+
 		enet0: ethernet@468000 {
 			phy-mode = "internal";
 			phy-handle = <&phy1>;
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index 15b27aa..c1c15ed 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -87,14 +87,32 @@
 			compatible = "brcm,bcm7120-l2-intc";
 			reg = <0x406780 0x8>;
 
-			brcm,int-map-mask = <0x44>;
+			brcm,int-map-mask = <0x44>, <0x7000000>;
 			brcm,int-fwd-mask = <0x70000>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&periph_intc>;
-			interrupts = <55>;
+			interrupts = <55>, <53>;
+			interrupt-names = "upg_main", "upg_bsc";
+		};
+
+		upg_aon_irq0_intc: upg_aon_irq0_intc@409480 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x409480 0x8>;
+
+			brcm,int-map-mask = <0x40>, <0x18000000>, <0x100000>;
+			brcm,int-fwd-mask = <0>;
+			brcm,irq-can-wake;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <56>, <54>, <59>;
+			interrupt-names = "upg_main_aon", "upg_bsc_aon",
+					  "upg_spi";
 		};
 
 		sun_top_ctrl: syscon@404000 {
@@ -119,6 +137,78 @@
 			status = "disabled";
 		};
 
+		uart1: serial@406b40 {
+			compatible = "ns16550a";
+			reg = <0x406b40 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <62>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart2: serial@406b80 {
+			compatible = "ns16550a";
+			reg = <0x406b80 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <63>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		bsca: i2c@409180 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_aon_irq0_intc>;
+		      reg = <0x409180 0x58>;
+		      interrupts = <27>;
+		      interrupt-names = "upg_bsca";
+		      status = "disabled";
+		};
+
+		bscb: i2c@409400 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_aon_irq0_intc>;
+		      reg = <0x409400 0x58>;
+		      interrupts = <28>;
+		      interrupt-names = "upg_bscb";
+		      status = "disabled";
+		};
+
+		bscc: i2c@406200 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406200 0x58>;
+		      interrupts = <24>;
+		      interrupt-names = "upg_bscc";
+		      status = "disabled";
+		};
+
+		bscd: i2c@406280 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406280 0x58>;
+		      interrupts = <25>;
+		      interrupt-names = "upg_bscd";
+		      status = "disabled";
+		};
+
+		bsce: i2c@406300 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406300 0x58>;
+		      interrupts = <26>;
+		      interrupt-names = "upg_bsce";
+		      status = "disabled";
+		};
+
 		enet0: ethernet@b80000 {
 			phy-mode = "internal";
 			phy-handle = <&phy1>;
@@ -227,11 +317,9 @@
 			reg-names = "ahci", "top-ctrl";
 			reg = <0x181000 0xa9c>, <0x180020 0x1c>;
 			interrupt-parent = <&periph_intc>;
-			interrupts = <40>;
+			interrupts = <41>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			brcm,broken-ncq;
-			brcm,broken-phy;
 			status = "disabled";
 
 			sata0: sata-port@0 {
@@ -245,7 +333,7 @@
 			};
 		};
 
-		sata_phy: sata-phy@1800000 {
+		sata_phy: sata-phy@180100 {
 			compatible = "brcm,bcm7425-sata-phy", "brcm,phy-sata3";
 			reg = <0x180100 0x0eff>;
 			reg-names = "phy";
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 56035e5..a874d3a 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -7,7 +7,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		mips-hpt-frequency = <163125000>;
+		mips-hpt-frequency = <175625000>;
 
 		cpu@0 {
 			compatible = "brcm,bmips5200";
@@ -63,13 +63,14 @@
 
 		periph_intc: periph_intc@41b500 {
 			compatible = "brcm,bcm7038-l1-intc";
-			reg = <0x41b500 0x40>, <0x41b600 0x40>;
+			reg = <0x41b500 0x40>, <0x41b600 0x40>,
+				<0x41b700 0x40>, <0x41b800 0x40>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&cpu_intc>;
-			interrupts = <2>, <3>;
+			interrupts = <2>, <3>, <2>, <3>;
 		};
 
 		sun_l2_intc: sun_l2_intc@403000 {
@@ -101,14 +102,32 @@
 			compatible = "brcm,bcm7120-l2-intc";
 			reg = <0x406780 0x8>;
 
-			brcm,int-map-mask = <0x44>;
+			brcm,int-map-mask = <0x44>, <0x7000000>;
 			brcm,int-fwd-mask = <0x70000>;
 
 			interrupt-controller;
 			#interrupt-cells = <1>;
 
 			interrupt-parent = <&periph_intc>;
-			interrupts = <60>;
+			interrupts = <60>, <58>;
+			interrupt-names = "upg_main", "upg_bsc";
+		};
+
+		upg_aon_irq0_intc: upg_aon_irq0_intc@409480 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x409480 0x8>;
+
+			brcm,int-map-mask = <0x40>, <0x18000000>, <0x100000>;
+			brcm,int-fwd-mask = <0>;
+			brcm,irq-can-wake;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>, <59>, <64>;
+			interrupt-names = "upg_main_aon", "upg_bsc_aon",
+					  "upg_spi";
 		};
 
 		sun_top_ctrl: syscon@404000 {
@@ -133,6 +152,78 @@
 			status = "disabled";
 		};
 
+		uart1: serial@406b40 {
+			compatible = "ns16550a";
+			reg = <0x406b40 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <67>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		uart2: serial@406b80 {
+			compatible = "ns16550a";
+			reg = <0x406b80 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <68>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		bsca: i2c@406300 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406300 0x58>;
+		      interrupts = <26>;
+		      interrupt-names = "upg_bsca";
+		      status = "disabled";
+		};
+
+		bscb: i2c@409400 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_aon_irq0_intc>;
+		      reg = <0x409400 0x58>;
+		      interrupts = <28>;
+		      interrupt-names = "upg_bscb";
+		      status = "disabled";
+		};
+
+		bscc: i2c@406200 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406200 0x58>;
+		      interrupts = <24>;
+		      interrupt-names = "upg_bscc";
+		      status = "disabled";
+		};
+
+		bscd: i2c@406280 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_irq0_intc>;
+		      reg = <0x406280 0x58>;
+		      interrupts = <25>;
+		      interrupt-names = "upg_bscd";
+		      status = "disabled";
+		};
+
+		bsce: i2c@409180 {
+		      clock-frequency = <390000>;
+		      compatible = "brcm,brcmstb-i2c";
+		      interrupt-parent = <&upg_aon_irq0_intc>;
+		      reg = <0x409180 0x58>;
+		      interrupts = <27>;
+		      interrupt-names = "upg_bsce";
+		      status = "disabled";
+		};
+
 		enet0: ethernet@b80000 {
 			phy-mode = "internal";
 			phy-handle = <&phy1>;
@@ -235,5 +326,45 @@
 			interrupts = <78>;
 			status = "disabled";
 		};
+
+		sata: sata@181000 {
+			compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
+			reg-names = "ahci", "top-ctrl";
+			reg = <0x181000 0xa9c>, <0x180020 0x1c>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <45>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata0: sata-port@0 {
+				reg = <0>;
+				phys = <&sata_phy0>;
+			};
+
+			sata1: sata-port@1 {
+				reg = <1>;
+				phys = <&sata_phy1>;
+			};
+		};
+
+		sata_phy: sata-phy@180100 {
+			compatible = "brcm,bcm7425-sata-phy", "brcm,phy-sata3";
+			reg = <0x180100 0x0eff>;
+			reg-names = "phy";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			sata_phy0: sata-phy@0 {
+				reg = <0>;
+				#phy-cells = <0>;
+			};
+
+			sata_phy1: sata-phy@1 {
+				reg = <1>;
+				#phy-cells = <0>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm96358nb4ser.dts b/arch/mips/boot/dts/brcm/bcm96358nb4ser.dts
new file mode 100644
index 0000000..f412117
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm96358nb4ser.dts
@@ -0,0 +1,46 @@
+/dts-v1/;
+
+/include/ "bcm6358.dtsi"
+
+/ {
+	compatible = "sfr,nb4-ser", "brcm,bcm6358";
+	model = "SFR Neufbox 4 (Sercomm)";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x02000000>;
+	};
+
+	chosen {
+		stdout-path = &uart0;
+	};
+};
+
+&leds0 {
+	status = "ok";
+
+	led@0 {
+		reg = <0>;
+		active-low;
+		label = "nb4-ser:white:alarm";
+	};
+	led@2 {
+		reg = <2>;
+		active-low;
+		label = "nb4-ser:white:tv";
+	};
+	led@3 {
+		reg = <3>;
+		active-low;
+		label = "nb4-ser:white:tel";
+	};
+	led@4 {
+		reg = <4>;
+		active-low;
+		label = "nb4-ser:white:adsl";
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm96368mvwg.dts b/arch/mips/boot/dts/brcm/bcm96368mvwg.dts
index 0e890c2..8c71c68 100644
--- a/arch/mips/boot/dts/brcm/bcm96368mvwg.dts
+++ b/arch/mips/boot/dts/brcm/bcm96368mvwg.dts
@@ -22,10 +22,10 @@
 };
 
 /* FIXME: need to set up USB_CTRL registers first */
-&ehci0 {
+&ehci {
 	status = "disabled";
 };
 
-&ohci0 {
+&ohci {
 	status = "disabled";
 };
diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
index e046b11..f2449d1 100644
--- a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
@@ -21,6 +21,30 @@
 	status = "okay";
 };
 
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&bsca {
+	status = "okay";
+};
+
+&bscb {
+	status = "okay";
+};
+
+&bscc {
+	status = "okay";
+};
+
+&bscd {
+	status = "okay";
+};
+
 /* FIXME: USB is wonky; disable it for now */
 &ehci0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
index d48462e..73124be 100644
--- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
@@ -56,3 +56,11 @@
 &ohci0 {
 	status = "okay";
 };
+
+&sata {
+	status = "okay";
+};
+
+&sata_phy {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts
index 67fe1f3..600d57a 100644
--- a/arch/mips/boot/dts/brcm/bcm97420c.dts
+++ b/arch/mips/boot/dts/brcm/bcm97420c.dts
@@ -23,6 +23,34 @@
 	status = "okay";
 };
 
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&bsca {
+	status = "okay";
+};
+
+&bscb {
+	status = "okay";
+};
+
+&bscc {
+	status = "okay";
+};
+
+&bscd {
+	status = "okay";
+};
+
+&bsce {
+	status = "okay";
+};
+
 /* FIXME: MAC driver comes up but cannot attach to PHY */
 &enet0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
index 689c68a..119c714 100644
--- a/arch/mips/boot/dts/brcm/bcm97425svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
@@ -23,6 +23,34 @@
 	status = "okay";
 };
 
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&bsca {
+	status = "okay";
+};
+
+&bscb {
+	status = "okay";
+};
+
+&bscc {
+	status = "okay";
+};
+
+&bscd {
+	status = "okay";
+};
+
+&bsce {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
index 1df0881..43e3ba2 100644
--- a/arch/mips/boot/dts/brcm/bcm97435svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
@@ -14,7 +14,7 @@
 	};
 
 	chosen {
-		bootargs = "console=ttyS0,115200 maxcpus=1";
+		bootargs = "console=ttyS0,115200";
 		stdout-path = &uart0;
 	};
 };
@@ -23,6 +23,34 @@
 	status = "okay";
 };
 
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&bsca {
+	status = "okay";
+};
+
+&bscb {
+	status = "okay";
+};
+
+&bscc {
+	status = "okay";
+};
+
+&bscd {
+	status = "okay";
+};
+
+&bsce {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -58,3 +86,11 @@
 &ohci3 {
 	status = "okay";
 };
+
+&sata {
+	status = "okay";
+};
+
+&sata_phy {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
new file mode 100644
index 0000000..d6bc994
--- /dev/null
+++ b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
@@ -0,0 +1,78 @@
+/*
+ * Device tree source for D-Link DSR-1000N.
+ *
+ * Written by: Aaro Koskinen <aaro.koskinen@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/include/ "octeon_3xxx.dtsi"
+
+/ {
+	model = "dlink,dsr-1000n";
+
+	soc@0 {
+		smi0: mdio@1180000001800 {
+			phy8: ethernet-phy@8 {
+				reg = <8>;
+				compatible = "ethernet-phy-ieee802.3-c22";
+			};
+		};
+
+		pip: pip@11800a0000000 {
+			interface@0 {
+				ethernet@0 {
+					fixed-link {
+						speed = <1000>;
+						full-duplex;
+					};
+				};
+				ethernet@1 {
+					fixed-link {
+						speed = <1000>;
+						full-duplex;
+					};
+				};
+				ethernet@2 {
+					phy-handle = <&phy8>;
+				};
+			};
+		};
+
+		twsi0: i2c@1180000001000 {
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		uart0: serial@1180000000800 {
+			clock-frequency = <500000000>;
+		};
+
+		usbn: usbn@1180068000000 {
+			refclk-frequency = <12000000>;
+			refclk-type = "crystal";
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		usb1 {
+			label = "usb1";
+			gpios = <&gpio 9 1>; /* Active low */
+		};
+
+		usb2 {
+			label = "usb2";
+			gpios = <&gpio 10 1>; /* Active low */
+		};
+	};
+
+	aliases {
+		pip = &pip;
+	};
+};
diff --git a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
index 9c48e05..de61f02 100644
--- a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
@@ -1,4 +1,3 @@
-/dts-v1/;
 /*
  * OCTEON 3XXX, 5XXX, 63XX device tree skeleton.
  *
@@ -6,56 +5,12 @@
  * use.	 Because of this, it contains a super-set of the available
  * devices and properties.
  */
+
+/include/ "octeon_3xxx.dtsi"
+
 / {
-	compatible = "cavium,octeon-3860";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&ciu>;
-
 	soc@0 {
-		compatible = "simple-bus";
-		#address-cells = <2>;
-		#size-cells = <2>;
-		ranges; /* Direct mapping */
-
-		ciu: interrupt-controller@1070000000000 {
-			compatible = "cavium,octeon-3860-ciu";
-			interrupt-controller;
-			/* Interrupts are specified by two parts:
-			 * 1) Controller register (0 or 1)
-			 * 2) Bit within the register (0..63)
-			 */
-			#interrupt-cells = <2>;
-			reg = <0x10700 0x00000000 0x0 0x7000>;
-		};
-
-		gpio: gpio-controller@1070000000800 {
-			#gpio-cells = <2>;
-			compatible = "cavium,octeon-3860-gpio";
-			reg = <0x10700 0x00000800 0x0 0x100>;
-			gpio-controller;
-			/* Interrupts are specified by two parts:
-			 * 1) GPIO pin number (0..15)
-			 * 2) Triggering (1 - edge rising
-			 *		  2 - edge falling
-			 *		  4 - level active high
-			 *		  8 - level active low)
-			 */
-			interrupt-controller;
-			#interrupt-cells = <2>;
-			/* The GPIO pin connect to 16 consecutive CUI bits */
-			interrupts = <0 16>, <0 17>, <0 18>, <0 19>,
-				     <0 20>, <0 21>, <0 22>, <0 23>,
-				     <0 24>, <0 25>, <0 26>, <0 27>,
-				     <0 28>, <0 29>, <0 30>, <0 31>;
-		};
-
 		smi0: mdio@1180000001800 {
-			compatible = "cavium,octeon-3860-mdio";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0x11800 0x00001800 0x0 0x40>;
-
 			phy0: ethernet-phy@0 {
 				compatible = "marvell,88e1118";
 				marvell,reg-init =
@@ -220,35 +175,16 @@
 		};
 
 		pip: pip@11800a0000000 {
-			compatible = "cavium,octeon-3860-pip";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0x11800 0xa0000000 0x0 0x2000>;
-
 			interface@0 {
-				compatible = "cavium,octeon-3860-pip-interface";
-				#address-cells = <1>;
-				#size-cells = <0>;
-				reg = <0>; /* interface */
-
 				ethernet@0 {
-					compatible = "cavium,octeon-3860-pip-port";
-					reg = <0x0>; /* Port */
-					local-mac-address = [ 00 00 00 00 00 00 ];
 					phy-handle = <&phy2>;
 					cavium,alt-phy-handle = <&phy100>;
 				};
 				ethernet@1 {
-					compatible = "cavium,octeon-3860-pip-port";
-					reg = <0x1>; /* Port */
-					local-mac-address = [ 00 00 00 00 00 00 ];
 					phy-handle = <&phy3>;
 					cavium,alt-phy-handle = <&phy101>;
 				};
 				ethernet@2 {
-					compatible = "cavium,octeon-3860-pip-port";
-					reg = <0x2>; /* Port */
-					local-mac-address = [ 00 00 00 00 00 00 ];
 					phy-handle = <&phy4>;
 					cavium,alt-phy-handle = <&phy102>;
 				};
@@ -322,11 +258,6 @@
 			};
 
 			interface@1 {
-				compatible = "cavium,octeon-3860-pip-interface";
-				#address-cells = <1>;
-				#size-cells = <0>;
-				reg = <1>; /* interface */
-
 				ethernet@0 {
 					compatible = "cavium,octeon-3860-pip-port";
 					reg = <0x0>; /* Port */
@@ -355,13 +286,6 @@
 		};
 
 		twsi0: i2c@1180000001000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "cavium,octeon-3860-twsi";
-			reg = <0x11800 0x00001000 0x0 0x200>;
-			interrupts = <0 45>;
-			clock-frequency = <100000>;
-
 			rtc@68 {
 				compatible = "dallas,ds1337";
 				reg = <0x68>;
@@ -381,15 +305,6 @@
 			clock-frequency = <100000>;
 		};
 
-		uart0: serial@1180000000800 {
-			compatible = "cavium,octeon-3860-uart","ns16550";
-			reg = <0x11800 0x00000800 0x0 0x400>;
-			clock-frequency = <0>;
-			current-speed = <115200>;
-			reg-shift = <3>;
-			interrupts = <0 34>;
-		};
-
 		uart1: serial@1180000000c00 {
 			compatible = "cavium,octeon-3860-uart","ns16550";
 			reg = <0x11800 0x00000c00 0x0 0x400>;
@@ -409,98 +324,6 @@
 		};
 
 		bootbus: bootbus@1180000000000 {
-			compatible = "cavium,octeon-3860-bootbus";
-			reg = <0x11800 0x00000000 0x0 0x200>;
-			/* The chip select number and offset */
-			#address-cells = <2>;
-			/* The size of the chip select region */
-			#size-cells = <1>;
-			ranges = <0 0  0x0 0x1f400000  0xc00000>,
-				 <1 0  0x10000 0x30000000  0>,
-				 <2 0  0x10000 0x40000000  0>,
-				 <3 0  0x10000 0x50000000  0>,
-				 <4 0  0x0 0x1d020000  0x10000>,
-				 <5 0  0x0 0x1d040000  0x10000>,
-				 <6 0  0x0 0x1d050000  0x10000>,
-				 <7 0  0x10000 0x90000000  0>;
-
-			cavium,cs-config@0 {
-				compatible = "cavium,octeon-3860-bootbus-config";
-				cavium,cs-index = <0>;
-				cavium,t-adr  = <20>;
-				cavium,t-ce   = <60>;
-				cavium,t-oe   = <60>;
-				cavium,t-we   = <45>;
-				cavium,t-rd-hld = <35>;
-				cavium,t-wr-hld = <45>;
-				cavium,t-pause	= <0>;
-				cavium,t-wait	= <0>;
-				cavium,t-page	= <35>;
-				cavium,t-rd-dly = <0>;
-
-				cavium,pages	 = <0>;
-				cavium,bus-width = <8>;
-			};
-			cavium,cs-config@4 {
-				compatible = "cavium,octeon-3860-bootbus-config";
-				cavium,cs-index = <4>;
-				cavium,t-adr  = <320>;
-				cavium,t-ce   = <320>;
-				cavium,t-oe   = <320>;
-				cavium,t-we   = <320>;
-				cavium,t-rd-hld = <320>;
-				cavium,t-wr-hld = <320>;
-				cavium,t-pause	= <320>;
-				cavium,t-wait	= <320>;
-				cavium,t-page	= <320>;
-				cavium,t-rd-dly = <0>;
-
-				cavium,pages	 = <0>;
-				cavium,bus-width = <8>;
-			};
-			cavium,cs-config@5 {
-				compatible = "cavium,octeon-3860-bootbus-config";
-				cavium,cs-index = <5>;
-				cavium,t-adr  = <5>;
-				cavium,t-ce   = <300>;
-				cavium,t-oe   = <125>;
-				cavium,t-we   = <150>;
-				cavium,t-rd-hld = <100>;
-				cavium,t-wr-hld = <30>;
-				cavium,t-pause	= <0>;
-				cavium,t-wait	= <30>;
-				cavium,t-page	= <320>;
-				cavium,t-rd-dly = <0>;
-
-				cavium,pages	 = <0>;
-				cavium,bus-width = <16>;
-			};
-			cavium,cs-config@6 {
-				compatible = "cavium,octeon-3860-bootbus-config";
-				cavium,cs-index = <6>;
-				cavium,t-adr  = <5>;
-				cavium,t-ce   = <300>;
-				cavium,t-oe   = <270>;
-				cavium,t-we   = <150>;
-				cavium,t-rd-hld = <100>;
-				cavium,t-wr-hld = <70>;
-				cavium,t-pause	= <0>;
-				cavium,t-wait	= <0>;
-				cavium,t-page	= <320>;
-				cavium,t-rd-dly = <0>;
-
-				cavium,pages	 = <0>;
-				cavium,wait-mode;
-				cavium,bus-width = <16>;
-			};
-
-			flash0: nor@0,0 {
-				compatible = "cfi-flash";
-				reg = <0 0 0x800000>;
-				#address-cells = <1>;
-				#size-cells = <1>;
-			};
-
 			led0: led-display@4,0 {
 				compatible = "avago,hdsp-253x";
 				reg = <4 0x20 0x20>, <4 0 0x20>;
@@ -515,17 +338,6 @@
 			};
 		};
 
-		dma0: dma-engine@1180000000100 {
-			compatible = "cavium,octeon-5750-bootbus-dma";
-			reg = <0x11800 0x00000100 0x0 0x8>;
-			interrupts = <0 63>;
-		};
-		dma1: dma-engine@1180000000108 {
-			compatible = "cavium,octeon-5750-bootbus-dma";
-			reg = <0x11800 0x00000108 0x0 0x8>;
-			interrupts = <0 63>;
-		};
-
 		uctl: uctl@118006f000000 {
 			compatible = "cavium,octeon-6335-uctl";
 			reg = <0x11800 0x6f000000 0x0 0x100>;
@@ -552,21 +364,10 @@
 		};
 
 		usbn: usbn@1180068000000 {
-			compatible = "cavium,octeon-5750-usbn";
-			reg = <0x11800 0x68000000 0x0 0x1000>;
-			ranges; /* Direct mapping */
-			#address-cells = <2>;
-			#size-cells = <2>;
 			/* 12MHz, 24MHz and 48MHz allowed */
 			refclk-frequency = <12000000>;
 			/* Either "crystal" or "external" */
 			refclk-type = "crystal";
-
-			usbc@16f0010000000 {
-				compatible = "cavium,octeon-5750-usbc";
-				reg = <0x16f00 0x10000000 0x0 0x80000>;
-				interrupts = <0 56>;
-			};
 		};
 	};
 
diff --git a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dtsi b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dtsi
new file mode 100644
index 0000000..5302148
--- /dev/null
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dtsi
@@ -0,0 +1,231 @@
+/* OCTEON 3XXX DTS common parts. */
+
+/dts-v1/;
+
+/ {
+	compatible = "cavium,octeon-3860";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&ciu>;
+
+	soc@0 {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges; /* Direct mapping */
+
+		ciu: interrupt-controller@1070000000000 {
+			compatible = "cavium,octeon-3860-ciu";
+			interrupt-controller;
+			/* Interrupts are specified by two parts:
+			 * 1) Controller register (0 or 1)
+			 * 2) Bit within the register (0..63)
+			 */
+			#interrupt-cells = <2>;
+			reg = <0x10700 0x00000000 0x0 0x7000>;
+		};
+
+		gpio: gpio-controller@1070000000800 {
+			#gpio-cells = <2>;
+			compatible = "cavium,octeon-3860-gpio";
+			reg = <0x10700 0x00000800 0x0 0x100>;
+			gpio-controller;
+			/* Interrupts are specified by two parts:
+			 * 1) GPIO pin number (0..15)
+			 * 2) Triggering (1 - edge rising
+			 *		  2 - edge falling
+			 *		  4 - level active high
+			 *		  8 - level active low)
+			 */
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			/* The GPIO pin connect to 16 consecutive CUI bits */
+			interrupts = <0 16>, <0 17>, <0 18>, <0 19>,
+				     <0 20>, <0 21>, <0 22>, <0 23>,
+				     <0 24>, <0 25>, <0 26>, <0 27>,
+				     <0 28>, <0 29>, <0 30>, <0 31>;
+		};
+
+		smi0: mdio@1180000001800 {
+			compatible = "cavium,octeon-3860-mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x11800 0x00001800 0x0 0x40>;
+		};
+
+		pip: pip@11800a0000000 {
+			compatible = "cavium,octeon-3860-pip";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x11800 0xa0000000 0x0 0x2000>;
+
+			interface@0 {
+				compatible = "cavium,octeon-3860-pip-interface";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <0>; /* interface */
+
+				ethernet@0 {
+					compatible = "cavium,octeon-3860-pip-port";
+					reg = <0x0>; /* Port */
+					local-mac-address = [ 00 00 00 00 00 00 ];
+				};
+				ethernet@1 {
+					compatible = "cavium,octeon-3860-pip-port";
+					reg = <0x1>; /* Port */
+					local-mac-address = [ 00 00 00 00 00 00 ];
+				};
+				ethernet@2 {
+					compatible = "cavium,octeon-3860-pip-port";
+					reg = <0x2>; /* Port */
+					local-mac-address = [ 00 00 00 00 00 00 ];
+				};
+			};
+
+			interface@1 {
+				compatible = "cavium,octeon-3860-pip-interface";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <1>; /* interface */
+			};
+		};
+
+		twsi0: i2c@1180000001000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "cavium,octeon-3860-twsi";
+			reg = <0x11800 0x00001000 0x0 0x200>;
+			interrupts = <0 45>;
+			clock-frequency = <100000>;
+		};
+
+		uart0: serial@1180000000800 {
+			compatible = "cavium,octeon-3860-uart","ns16550";
+			reg = <0x11800 0x00000800 0x0 0x400>;
+			clock-frequency = <0>;
+			current-speed = <115200>;
+			reg-shift = <3>;
+			interrupts = <0 34>;
+		};
+
+		bootbus: bootbus@1180000000000 {
+			compatible = "cavium,octeon-3860-bootbus";
+			reg = <0x11800 0x00000000 0x0 0x200>;
+			/* The chip select number and offset */
+			#address-cells = <2>;
+			/* The size of the chip select region */
+			#size-cells = <1>;
+			ranges = <0 0  0x0 0x1f400000  0xc00000>,
+				 <1 0  0x10000 0x30000000  0>,
+				 <2 0  0x10000 0x40000000  0>,
+				 <3 0  0x10000 0x50000000  0>,
+				 <4 0  0x0 0x1d020000  0x10000>,
+				 <5 0  0x0 0x1d040000  0x10000>,
+				 <6 0  0x0 0x1d050000  0x10000>,
+				 <7 0  0x10000 0x90000000  0>;
+
+			cavium,cs-config@0 {
+				compatible = "cavium,octeon-3860-bootbus-config";
+				cavium,cs-index = <0>;
+				cavium,t-adr  = <20>;
+				cavium,t-ce   = <60>;
+				cavium,t-oe   = <60>;
+				cavium,t-we   = <45>;
+				cavium,t-rd-hld = <35>;
+				cavium,t-wr-hld = <45>;
+				cavium,t-pause	= <0>;
+				cavium,t-wait	= <0>;
+				cavium,t-page	= <35>;
+				cavium,t-rd-dly = <0>;
+
+				cavium,pages	 = <0>;
+				cavium,bus-width = <8>;
+			};
+			cavium,cs-config@4 {
+				compatible = "cavium,octeon-3860-bootbus-config";
+				cavium,cs-index = <4>;
+				cavium,t-adr  = <320>;
+				cavium,t-ce   = <320>;
+				cavium,t-oe   = <320>;
+				cavium,t-we   = <320>;
+				cavium,t-rd-hld = <320>;
+				cavium,t-wr-hld = <320>;
+				cavium,t-pause	= <320>;
+				cavium,t-wait	= <320>;
+				cavium,t-page	= <320>;
+				cavium,t-rd-dly = <0>;
+
+				cavium,pages	 = <0>;
+				cavium,bus-width = <8>;
+			};
+			cavium,cs-config@5 {
+				compatible = "cavium,octeon-3860-bootbus-config";
+				cavium,cs-index = <5>;
+				cavium,t-adr  = <5>;
+				cavium,t-ce   = <300>;
+				cavium,t-oe   = <125>;
+				cavium,t-we   = <150>;
+				cavium,t-rd-hld = <100>;
+				cavium,t-wr-hld = <30>;
+				cavium,t-pause	= <0>;
+				cavium,t-wait	= <30>;
+				cavium,t-page	= <320>;
+				cavium,t-rd-dly = <0>;
+
+				cavium,pages	 = <0>;
+				cavium,bus-width = <16>;
+			};
+			cavium,cs-config@6 {
+				compatible = "cavium,octeon-3860-bootbus-config";
+				cavium,cs-index = <6>;
+				cavium,t-adr  = <5>;
+				cavium,t-ce   = <300>;
+				cavium,t-oe   = <270>;
+				cavium,t-we   = <150>;
+				cavium,t-rd-hld = <100>;
+				cavium,t-wr-hld = <70>;
+				cavium,t-pause	= <0>;
+				cavium,t-wait	= <0>;
+				cavium,t-page	= <320>;
+				cavium,t-rd-dly = <0>;
+
+				cavium,pages	 = <0>;
+				cavium,wait-mode;
+				cavium,bus-width = <16>;
+			};
+
+			flash0: nor@0,0 {
+				compatible = "cfi-flash";
+				reg = <0 0 0x800000>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+			};
+		};
+
+		dma0: dma-engine@1180000000100 {
+			compatible = "cavium,octeon-5750-bootbus-dma";
+			reg = <0x11800 0x00000100 0x0 0x8>;
+			interrupts = <0 63>;
+		};
+
+		dma1: dma-engine@1180000000108 {
+			compatible = "cavium,octeon-5750-bootbus-dma";
+			reg = <0x11800 0x00000108 0x0 0x8>;
+			interrupts = <0 63>;
+		};
+
+		usbn: usbn@1180068000000 {
+			compatible = "cavium,octeon-5750-usbn";
+			reg = <0x11800 0x68000000 0x0 0x1000>;
+			ranges; /* Direct mapping */
+			#address-cells = <2>;
+			#size-cells = <2>;
+
+			usbc@16f0010000000 {
+				compatible = "cavium,octeon-5750-usbc";
+				reg = <0x16f00 0x10000000 0x0 0x80000>;
+				interrupts = <0 56>;
+			};
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/cavium-octeon/ubnt_e100.dts b/arch/mips/boot/dts/cavium-octeon/ubnt_e100.dts
new file mode 100644
index 0000000..243e5dc
--- /dev/null
+++ b/arch/mips/boot/dts/cavium-octeon/ubnt_e100.dts
@@ -0,0 +1,59 @@
+/*
+ * Device tree source for EdgeRouter Lite.
+ *
+ * Written by: Aaro Koskinen <aaro.koskinen@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/include/ "octeon_3xxx.dtsi"
+
+/ {
+	model = "ubnt,e100";
+
+	soc@0 {
+		smi0: mdio@1180000001800 {
+			phy5: ethernet-phy@5 {
+				reg = <5>;
+				compatible = "ethernet-phy-ieee802.3-c22";
+			};
+			phy6: ethernet-phy@6 {
+				reg = <6>;
+				compatible = "ethernet-phy-ieee802.3-c22";
+			};
+			phy7: ethernet-phy@7 {
+				reg = <7>;
+				compatible = "ethernet-phy-ieee802.3-c22";
+			};
+		};
+
+		pip: pip@11800a0000000 {
+			interface@0 {
+				ethernet@0 {
+					phy-handle = <&phy7>;
+				};
+				ethernet@1 {
+					phy-handle = <&phy6>;
+				};
+				ethernet@2 {
+					phy-handle = <&phy5>;
+				};
+			};
+		};
+
+		uart0: serial@1180000000800 {
+			clock-frequency = <500000000>;
+		};
+
+		usbn: usbn@1180068000000 {
+			refclk-frequency = <12000000>;
+			refclk-type = "crystal";
+		};
+	};
+
+	aliases {
+		pip = &pip;
+	};
+};
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 8b2437c..4a9c8f2 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -65,4 +65,18 @@
 		clocks = <&ext>, <&cgu JZ4740_CLK_UART1>;
 		clock-names = "baud", "module";
 	};
+
+	uhc: uhc@13030000 {
+		compatible = "ingenic,jz4740-ohci", "generic-ohci";
+		reg = <0x13030000 0x1000>;
+
+		clocks = <&cgu JZ4740_CLK_UHC>;
+		assigned-clocks = <&cgu JZ4740_CLK_UHC>;
+		assigned-clock-rates = <48000000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <3>;
+
+		status = "disabled";
+	};
 };
diff --git a/arch/mips/boot/dts/lantiq/easy50712.dts b/arch/mips/boot/dts/lantiq/easy50712.dts
index 143b8a3..b599625 100644
--- a/arch/mips/boot/dts/lantiq/easy50712.dts
+++ b/arch/mips/boot/dts/lantiq/easy50712.dts
@@ -52,7 +52,7 @@
 		};
 
 		gpio: pinmux@E100B10 {
-			compatible = "lantiq,pinctrl-xway";
+			compatible = "lantiq,danube-pinctrl";
 			pinctrl-names = "default";
 			pinctrl-0 = <&state_default>;
 
diff --git a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
deleted file mode 100644
index ef13350..0000000
--- a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Device Tree Source for PIC32MZDA clock data
- *
- * Purna Chandra Mandal <purna.mandal@microchip.com>
- * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
- *
- * Licensed under GPLv2 or later.
- */
-
-/* all fixed rate clocks */
-
-/ {
-	POSC:posc_clk { /* On-chip primary oscillator */
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <24000000>;
-	};
-
-	FRC:frc_clk { /* internal FRC oscillator */
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <8000000>;
-	};
-
-	BFRC:bfrc_clk { /* internal backup FRC oscillator */
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <8000000>;
-	};
-
-	LPRC:lprc_clk { /* internal low-power FRC oscillator */
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <32000>;
-	};
-
-	/* UPLL provides clock to USBCORE */
-	UPLL:usb_phy_clk {
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <24000000>;
-		clock-output-names = "usbphy_clk";
-	};
-
-	TxCKI:txcki_clk { /* external clock input on TxCLKI pin */
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <4000000>;
-		status = "disabled";
-	};
-
-	/* external clock input on REFCLKIx pin */
-	REFIx:refix_clk {
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <24000000>;
-		status = "disabled";
-	};
-
-	/* PIC32 specific clks */
-	pic32_clktree {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		reg = <0x1f801200 0x200>;
-		compatible = "microchip,pic32mzda-clk";
-		ranges = <0 0x1f801200 0x200>;
-
-		/* secondary oscillator; external input on SOSCI pin */
-		SOSC:sosc_clk@0 {
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-sosc";
-			clock-frequency = <32768>;
-			reg = <0x000 0x10>,   /* enable reg */
-			      <0x1d0 0x10>; /* status reg */
-			microchip,bit-mask = <0x02>; /* enable mask */
-			microchip,status-bit-mask = <0x10>; /* status-mask*/
-		};
-
-		FRCDIV:frcdiv_clk {
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-frcdivclk";
-			clocks = <&FRC>;
-			clock-output-names = "frcdiv_clk";
-		};
-
-		/* System PLL clock */
-		SYSPLL:spll_clk@020 {
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-syspll";
-			reg = <0x020 0x10>, /* SPLL register */
-			      <0x1d0 0x10>; /* CLKSTAT register */
-			clocks = <&POSC>, <&FRC>;
-			clock-output-names = "sys_pll";
-			microchip,status-bit-mask = <0x80>; /* SPLLRDY */
-		};
-
-		/* system clock; mux with postdiv & slew */
-		SYSCLK:sys_clk@1c0 {
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-sysclk-v2";
-			reg = <0x1c0 0x04>; /* SLEWCON */
-			clocks = <&FRCDIV>, <&SYSPLL>, <&POSC>, <&SOSC>,
-				 <&LPRC>, <&FRCDIV>;
-			microchip,clock-indices = <0>, <1>, <2>, <4>,
-						  <5>, <7>;
-			clock-output-names = "sys_clk";
-		};
-
-		/* Peripheral bus1 clock */
-		PBCLK1:pb1_clk@140 {
-			reg = <0x140 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			clock-output-names = "pb1_clk";
-			/* used by system modules, not gateable */
-			microchip,ignore-unused;
-		};
-
-		/* Peripheral bus2 clock */
-		PBCLK2:pb2_clk@150 {
-			reg = <0x150 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			clock-output-names = "pb2_clk";
-			/* avoid gating even if unused */
-			microchip,ignore-unused;
-		};
-
-		/* Peripheral bus3 clock */
-		PBCLK3:pb3_clk@160 {
-			reg = <0x160 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			clock-output-names = "pb3_clk";
-		};
-
-		/* Peripheral bus4 clock(I/O ports, GPIO) */
-		PBCLK4:pb4_clk@170 {
-			reg = <0x170 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			clock-output-names = "pb4_clk";
-		};
-
-		/* Peripheral bus clock */
-		PBCLK5:pb5_clk@180 {
-			reg = <0x180 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			clock-output-names = "pb5_clk";
-		};
-
-		/* Peripheral Bus6 clock; */
-		PBCLK6:pb6_clk@190 {
-			reg = <0x190 0x10>;
-			compatible = "microchip,pic32mzda-pbclk";
-			clocks = <&SYSCLK>;
-			#clock-cells = <0>;
-		};
-
-		/* Peripheral bus7 clock */
-		PBCLK7:pb7_clk@1a0 {
-			reg = <0x1a0 0x10>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-pbclk";
-			/* CPU is driven by this clock; so named */
-			clock-output-names = "cpu_clk";
-			clocks = <&SYSCLK>;
-		};
-
-		/* Reference Oscillator clock for SPI/I2S */
-		REFCLKO1:refo1_clk@80 {
-			reg = <0x080 0x20>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-refoclk";
-			clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
-				 <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
-			microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
-						  <5>, <7>, <8>, <9>;
-			clock-output-names = "refo1_clk";
-		};
-
-		/* Reference Oscillator clock for SQI */
-		REFCLKO2:refo2_clk@a0 {
-			reg = <0x0a0 0x20>;
-			#clock-cells = <0>;
-			compatible = "microchip,pic32mzda-refoclk";
-			clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
-				 <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
-			microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
-						  <5>, <7>, <8>, <9>;
-			clock-output-names = "refo2_clk";
-		};
-
-		/* Reference Oscillator clock, ADC */
-		REFCLKO3:refo3_clk@c0 {
-			reg = <0x0c0 0x20>;
-			compatible = "microchip,pic32mzda-refoclk";
-			clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
-				 <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
-			microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
-						  <5>, <7>, <8>, <9>;
-			#clock-cells = <0>;
-			clock-output-names = "refo3_clk";
-		};
-
-		/* Reference Oscillator clock */
-		REFCLKO4:refo4_clk@e0 {
-			reg = <0x0e0 0x20>;
-			compatible = "microchip,pic32mzda-refoclk";
-			clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
-				 <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
-			microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
-						  <5>, <7>, <8>, <9>;
-			#clock-cells = <0>;
-			clock-output-names = "refo4_clk";
-		};
-
-		/* Reference Oscillator clock, LCD */
-		REFCLKO5:refo5_clk@100 {
-			reg = <0x100 0x20>;
-			compatible = "microchip,pic32mzda-refoclk";
-			clocks = <&SYSCLK>,<&PBCLK1>,<&POSC>,<&FRC>,<&LPRC>,
-				 <&SOSC>,<&SYSPLL>,<&REFIx>,<&BFRC>;
-			microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
-						  <5>, <7>, <8>, <9>;
-			#clock-cells = <0>;
-			clock-output-names = "refo5_clk";
-		};
-	};
-};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
index ad9e3318..5353a63 100644
--- a/arch/mips/boot/dts/pic32/pic32mzda.dtsi
+++ b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
@@ -6,11 +6,9 @@
  * published by the Free Software Foundation.
  *
  */
-
+#include <dt-bindings/clock/microchip,pic32-clock.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 
-#include "pic32mzda-clk.dtsi"
-
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -50,6 +48,29 @@
 		interrupts = <0 IRQ_TYPE_EDGE_RISING>;
 	};
 
+	/* external clock input on TxCLKI pin */
+	txcki: txcki_clk {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <4000000>;
+		status = "disabled";
+	};
+
+	/* external input on REFCLKIx pin */
+	refix: refix_clk {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <24000000>;
+		status = "disabled";
+	};
+
+	rootclk: clock-controller@1f801200 {
+		compatible = "microchip,pic32mzda-clk";
+		reg = <0x1f801200 0x200>;
+		#clock-cells = <1>;
+		microchip,pic32mzda-sosc;
+	};
+
 	evic: interrupt-controller@1f810000 {
 		compatible = "microchip,pic32mzda-evic";
 		interrupt-controller;
@@ -63,7 +84,7 @@
 		#size-cells = <1>;
 		compatible = "microchip,pic32mzda-pinctrl";
 		reg = <0x1f801400 0x400>;
-		clocks = <&PBCLK1>;
+		clocks = <&rootclk PB1CLK>;
 	};
 
 	/* PORTA */
@@ -75,7 +96,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <0>;
 		gpio-ranges = <&pic32_pinctrl 0 0 16>;
 	};
@@ -89,7 +110,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <1>;
 		gpio-ranges = <&pic32_pinctrl 0 16 16>;
 	};
@@ -103,7 +124,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <2>;
 		gpio-ranges = <&pic32_pinctrl 0 32 16>;
 	};
@@ -117,7 +138,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <3>;
 		gpio-ranges = <&pic32_pinctrl 0 48 16>;
 	};
@@ -131,7 +152,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <4>;
 		gpio-ranges = <&pic32_pinctrl 0 64 16>;
 	};
@@ -145,7 +166,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <5>;
 		gpio-ranges = <&pic32_pinctrl 0 80 16>;
 	};
@@ -159,7 +180,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <6>;
 		gpio-ranges = <&pic32_pinctrl 0 96 16>;
 	};
@@ -173,7 +194,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <7>;
 		gpio-ranges = <&pic32_pinctrl 0 112 16>;
 	};
@@ -189,7 +210,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <8>;
 		gpio-ranges = <&pic32_pinctrl 0 128 16>;
 	};
@@ -203,7 +224,7 @@
 		gpio-controller;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		clocks = <&PBCLK4>;
+		clocks = <&rootclk PB4CLK>;
 		microchip,gpio-bank = <9>;
 		gpio-ranges = <&pic32_pinctrl 0 144 16>;
 	};
@@ -212,7 +233,7 @@
 		compatible = "microchip,pic32mzda-sdhci";
 		reg = <0x1f8ec000 0x100>;
 		interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&REFCLKO4>, <&PBCLK5>;
+		clocks = <&rootclk REF4CLK>, <&rootclk PB5CLK>;
 		clock-names = "base_clk", "sys_clk";
 		bus-width = <4>;
 		cap-sd-highspeed;
@@ -225,7 +246,7 @@
 		interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
 			<113 IRQ_TYPE_LEVEL_HIGH>,
 			<114 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 
@@ -235,7 +256,7 @@
 		interrupts = <145 IRQ_TYPE_LEVEL_HIGH>,
 			<146 IRQ_TYPE_LEVEL_HIGH>,
 			<147 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 
@@ -245,7 +266,7 @@
 		interrupts = <157 IRQ_TYPE_LEVEL_HIGH>,
 			<158 IRQ_TYPE_LEVEL_HIGH>,
 			<159 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 
@@ -255,7 +276,7 @@
 		interrupts = <170 IRQ_TYPE_LEVEL_HIGH>,
 			<171 IRQ_TYPE_LEVEL_HIGH>,
 			<172 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 
@@ -265,7 +286,7 @@
 		interrupts = <179 IRQ_TYPE_LEVEL_HIGH>,
 			<180 IRQ_TYPE_LEVEL_HIGH>,
 			<181 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 
@@ -275,7 +296,7 @@
 		interrupts = <188 IRQ_TYPE_LEVEL_HIGH>,
 			<189 IRQ_TYPE_LEVEL_HIGH>,
 			<190 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&PBCLK2>;
+		clocks = <&rootclk PB2CLK>;
 		status = "disabled";
 	};
 };
diff --git a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
index 5d434a5..fc74010 100644
--- a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
+++ b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
@@ -95,8 +95,9 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_sdhc1>;
 	status = "okay";
-	assigned-clocks = <&REFCLKO2>,<&REFCLKO4>,<&REFCLKO5>;
-	assigned-clock-rates = <50000000>,<25000000>,<40000000>;
+	assigned-clocks = <&rootclk REF2CLK>, <&rootclk REF4CLK>,
+		<&rootclk REF5CLK>;
+	assigned-clock-rates = <50000000>, <25000000>, <40000000>;
 };
 
 &pic32_pinctrl {
diff --git a/arch/mips/boot/dts/qca/Makefile b/arch/mips/boot/dts/qca/Makefile
index 2d61455d..63a9ddf 100644
--- a/arch/mips/boot/dts/qca/Makefile
+++ b/arch/mips/boot/dts/qca/Makefile
@@ -1,8 +1,9 @@
 # All DTBs
 dtb-$(CONFIG_ATH79)			+= ar9132_tl_wr1043nd_v1.dtb
-
-# Select a DTB to build in the kernel
-obj-$(CONFIG_DTB_TL_WR1043ND_V1)	+= ar9132_tl_wr1043nd_v1.dtb.o
+dtb-$(CONFIG_ATH79)			+= ar9331_dpt_module.dtb
+dtb-$(CONFIG_ATH79)			+= ar9331_dragino_ms14.dtb
+dtb-$(CONFIG_ATH79)			+= ar9331_omega.dtb
+dtb-$(CONFIG_ATH79)			+= ar9331_tl_mr3020.dtb
 
 # Force kbuild to make empty built-in.o if necessary
 obj-				+= dummy.o
diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index 3c2ed9e..302f0a8 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi
@@ -1,3 +1,5 @@
+#include <dt-bindings/clock/ath79-clk.h>
+
 / {
 	compatible = "qca,ar9132";
 
@@ -11,6 +13,7 @@
 		cpu@0 {
 			device_type = "cpu";
 			compatible = "mips,mips24Kc";
+			clocks = <&pll ATH79_CLK_CPU>;
 			reg = <0>;
 		};
 	};
@@ -52,12 +55,12 @@
 				#qca,ddr-wb-channel-cells = <1>;
 			};
 
-			uart@18020000 {
+			uart: uart@18020000 {
 				compatible = "ns8250";
 				reg = <0x18020000 0x20>;
 				interrupts = <3>;
 
-				clocks = <&pll 2>;
+				clocks = <&pll ATH79_CLK_AHB>;
 				clock-names = "uart";
 
 				reg-io-width = <4>;
@@ -94,13 +97,13 @@
 				clock-output-names = "cpu", "ddr", "ahb";
 			};
 
-			wdt@18060008 {
+			wdt: wdt@18060008 {
 				compatible = "qca,ar7130-wdt";
 				reg = <0x18060008 0x8>;
 
 				interrupts = <4>;
 
-				clocks = <&pll 2>;
+				clocks = <&pll ATH79_CLK_AHB>;
 				clock-names = "wdt";
 			};
 
@@ -125,7 +128,7 @@
 			};
 		};
 
-		usb@1b000100 {
+		usb: usb@1b000100 {
 			compatible = "qca,ar7100-ehci", "generic-ehci";
 			reg = <0x1b000100 0x100>;
 
@@ -140,11 +143,11 @@
 			status = "disabled";
 		};
 
-		spi@1f000000 {
+		spi: spi@1f000000 {
 			compatible = "qca,ar9132-spi", "qca,ar7100-spi";
 			reg = <0x1f000000 0x10>;
 
-			clocks = <&pll 2>;
+			clocks = <&pll ATH79_CLK_AHB>;
 			clock-names = "ahb";
 
 			status = "disabled";
diff --git a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
index 4f1540e5f..3c3b7ce 100644
--- a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
+++ b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
@@ -9,10 +9,6 @@
 	compatible = "tplink,tl-wr1043nd-v1", "qca,ar9132";
 	model = "TP-Link TL-WR1043ND Version 1";
 
-	alias {
-		serial0 = "/ahb/apb/uart@18020000";
-	};
-
 	memory@0 {
 		device_type = "memory";
 		reg = <0x0 0x2000000>;
@@ -24,55 +20,6 @@
 		clock-frequency = <40000000>;
 	};
 
-	ahb {
-		apb {
-			uart@18020000 {
-				status = "okay";
-			};
-
-			pll-controller@18050000 {
-				clocks = <&extosc>;
-			};
-		};
-
-		usb@1b000100 {
-			status = "okay";
-		};
-
-		spi@1f000000 {
-			status = "okay";
-			num-cs = <1>;
-
-			flash@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				compatible = "s25sl064a";
-				reg = <0>;
-				spi-max-frequency = <25000000>;
-
-				partition@0 {
-					label = "u-boot";
-					reg = <0x000000 0x020000>;
-				};
-
-				partition@1 {
-					label = "firmware";
-					reg = <0x020000 0x7D0000>;
-				};
-
-				partition@2 {
-					label = "art";
-					reg = <0x7F0000 0x010000>;
-					read-only;
-				};
-			};
-		};
-	};
-
-	usb-phy {
-		status = "okay";
-	};
-
 	gpio-keys {
 		compatible = "gpio-keys-polled";
 		#address-cells = <1>;
@@ -118,3 +65,48 @@
 		};
 	};
 };
+
+&uart {
+	status = "okay";
+};
+
+&pll {
+	clocks = <&extosc>;
+};
+
+&usb {
+	status = "okay";
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&spi {
+	status = "okay";
+	num-cs = <1>;
+
+	flash@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "s25sl064a";
+		reg = <0>;
+		spi-max-frequency = <25000000>;
+
+		partition@0 {
+			label = "u-boot";
+			reg = <0x000000 0x020000>;
+		};
+
+		partition@1 {
+			label = "firmware";
+			reg = <0x020000 0x7D0000>;
+		};
+
+		partition@2 {
+			label = "art";
+			reg = <0x7F0000 0x010000>;
+			read-only;
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
new file mode 100644
index 0000000..cf47ed4
--- /dev/null
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -0,0 +1,155 @@
+#include <dt-bindings/clock/ath79-clk.h>
+
+/ {
+	compatible = "qca,ar9331";
+
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "mips,mips24Kc";
+			clocks = <&pll ATH79_CLK_CPU>;
+			reg = <0>;
+		};
+	};
+
+	cpuintc: interrupt-controller {
+		compatible = "qca,ar7100-cpu-intc";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+
+		qca,ddr-wb-channel-interrupts = <2>, <3>;
+		qca,ddr-wb-channels = <&ddr_ctrl 3>, <&ddr_ctrl 2>;
+	};
+
+	ref: ref {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+	};
+
+	ahb {
+		compatible = "simple-bus";
+		ranges;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		interrupt-parent = <&cpuintc>;
+
+		apb {
+			compatible = "simple-bus";
+			ranges;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			interrupt-parent = <&miscintc>;
+
+			ddr_ctrl: memory-controller@18000000 {
+				compatible = "qca,ar7240-ddr-controller";
+				reg = <0x18000000 0x100>;
+
+				#qca,ddr-wb-channel-cells = <1>;
+			};
+
+			uart: uart@18020000 {
+				compatible = "qca,ar9330-uart";
+				reg = <0x18020000 0x14>;
+
+				interrupts = <3>;
+
+				clocks = <&ref>;
+				clock-names = "uart";
+
+				status = "disabled";
+			};
+
+			gpio: gpio@18040000 {
+				compatible = "qca,ar7100-gpio";
+				reg = <0x18040000 0x34>;
+				interrupts = <2>;
+
+				ngpios = <30>;
+
+				gpio-controller;
+				#gpio-cells = <2>;
+
+				interrupt-controller;
+				#interrupt-cells = <2>;
+
+				status = "disabled";
+			};
+
+			pll: pll-controller@18050000 {
+				compatible = "qca,ar9330-pll";
+				reg = <0x18050000 0x100>;
+
+				clocks = <&ref>;
+				clock-names = "ref";
+
+				#clock-cells = <1>;
+			};
+
+			miscintc: interrupt-controller@18060010 {
+				compatible = "qca,ar7240-misc-intc";
+				reg = <0x18060010 0x4>;
+
+				interrupt-parent = <&cpuintc>;
+				interrupts = <6>;
+
+				interrupt-controller;
+				#interrupt-cells = <1>;
+			};
+
+			rst: reset-controller@1806001c {
+				compatible = "qca,ar7100-reset";
+				reg = <0x1806001c 0x4>;
+
+				#reset-cells = <1>;
+			};
+		};
+
+		usb: usb@1b000100 {
+			compatible = "chipidea,usb2";
+			reg = <0x1b000000 0x200>;
+
+			interrupts = <3>;
+			resets = <&rst 5>;
+
+			phy-names = "usb-phy";
+			phys = <&usb_phy>;
+
+			status = "disabled";
+		};
+
+		spi: spi@1f000000 {
+			compatible = "qca,ar7100-spi";
+			reg = <0x1f000000 0x10>;
+
+			clocks = <&pll ATH79_CLK_AHB>;
+			clock-names = "ahb";
+
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			status = "disabled";
+		};
+	};
+
+	usb_phy: usb-phy {
+		compatible = "qca,ar7100-usb-phy";
+
+		reset-names = "usb-phy", "usb-suspend-override";
+		resets = <&rst 4>, <&rst 3>;
+
+		#phy-cells = <0>;
+
+		status = "disabled";
+	};
+};
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
new file mode 100644
index 0000000..98e7450
--- /dev/null
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -0,0 +1,78 @@
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+#include "ar9331.dtsi"
+
+/ {
+	model = "DPTechnics DPT-Module";
+	compatible = "dptechnics,dpt-module";
+
+	aliases {
+		serial0 = &uart;
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x4000000>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "dpt-module:green:system";
+			gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&ref {
+	clock-frequency = <25000000>;
+};
+
+&uart {
+	status = "okay";
+};
+
+&gpio {
+	status = "okay";
+};
+
+&usb {
+	dr_mode = "host";
+	status = "okay";
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&spi {
+	num-chipselects = <1>;
+	status = "okay";
+
+	/* Winbond 25Q128FVSG SPI flash */
+	spiflash: w25q128@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "winbond,w25q128", "jedec,spi-nor";
+		spi-max-frequency = <104000000>;
+		reg = <0>;
+	};
+};
diff --git a/arch/mips/boot/dts/qca/ar9331_dragino_ms14.dts b/arch/mips/boot/dts/qca/ar9331_dragino_ms14.dts
new file mode 100644
index 0000000..56f8320
--- /dev/null
+++ b/arch/mips/boot/dts/qca/ar9331_dragino_ms14.dts
@@ -0,0 +1,102 @@
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+#include "ar9331.dtsi"
+
+/ {
+	model = "Dragino MS14 (Dragino 2)";
+	compatible = "dragino,ms14";
+
+	aliases {
+		serial0 = &uart;
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x4000000>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		wlan {
+			label = "dragino2:red:wlan";
+			gpios = <&gpio 0 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		lan {
+			label = "dragino2:red:lan";
+			gpios = <&gpio 13 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+
+		wan {
+			label = "dragino2:red:wan";
+			gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+
+		system {
+			label = "dragino2:red:system";
+			gpios = <&gpio 28 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "jumpstart";
+			linux,code = <KEY_WPS_BUTTON>;
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+		};
+
+		button@1 {
+			label = "reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&gpio 12 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&ref {
+	clock-frequency = <25000000>;
+};
+
+&uart {
+	status = "okay";
+};
+
+&gpio {
+	status = "okay";
+};
+
+&usb {
+	dr_mode = "host";
+	status = "okay";
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&spi {
+	num-chipselects = <1>;
+	status = "okay";
+
+	/* Winbond 25Q128BVFG SPI flash */
+	spiflash: w25q128@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "winbond,w25q128", "jedec,spi-nor";
+		spi-max-frequency = <104000000>;
+		reg = <0>;
+	};
+};
diff --git a/arch/mips/boot/dts/qca/ar9331_omega.dts b/arch/mips/boot/dts/qca/ar9331_omega.dts
new file mode 100644
index 0000000..b2be3b0
--- /dev/null
+++ b/arch/mips/boot/dts/qca/ar9331_omega.dts
@@ -0,0 +1,78 @@
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+#include "ar9331.dtsi"
+
+/ {
+	model = "Onion Omega";
+	compatible = "onion,omega";
+
+	aliases {
+		serial0 = &uart;
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x4000000>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "onion:amber:system";
+			gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&gpio 11 GPIO_ACTIVE_HIGH>;
+		};
+	};
+};
+
+&ref {
+	clock-frequency = <25000000>;
+};
+
+&uart {
+	status = "okay";
+};
+
+&gpio {
+	status = "okay";
+};
+
+&usb {
+	dr_mode = "host";
+	status = "okay";
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&spi {
+	num-chipselects = <1>;
+	status = "okay";
+
+	/* Winbond 25Q128FVSG SPI flash */
+	spiflash: w25q128@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "winbond,w25q128", "jedec,spi-nor";
+		spi-max-frequency = <104000000>;
+		reg = <0>;
+	};
+};
diff --git a/arch/mips/boot/dts/qca/ar9331_tl_mr3020.dts b/arch/mips/boot/dts/qca/ar9331_tl_mr3020.dts
new file mode 100644
index 0000000..919cf3b
--- /dev/null
+++ b/arch/mips/boot/dts/qca/ar9331_tl_mr3020.dts
@@ -0,0 +1,118 @@
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+#include "ar9331.dtsi"
+
+/ {
+	model = "TP-Link TL-MR3020";
+	compatible = "tplink,tl-mr3020";
+
+	aliases {
+		serial0 = &uart;
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x2000000>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		wlan {
+			label = "tp-link:green:wlan";
+			gpios = <&gpio 0 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		lan {
+			label = "tp-link:green:lan";
+			gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+
+		wps {
+			label = "tp-link:green:wps";
+			gpios = <&gpio 26 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+
+		led3g {
+			label = "tp-link:green:3g";
+			gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "wps";
+			linux,code = <KEY_WPS_BUTTON>;
+			gpios = <&gpio 11 GPIO_ACTIVE_HIGH>;
+		};
+
+		button@1 {
+			label = "sw1";
+			linux,code = <BTN_0>;
+			gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
+		};
+
+		button@2 {
+			label = "sw2";
+			linux,code = <BTN_1>;
+			gpios = <&gpio 20 GPIO_ACTIVE_HIGH>;
+		};
+	};
+
+	reg_usb_vbus: reg_usb_vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio 8 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+};
+
+&ref {
+	clock-frequency = <25000000>;
+};
+
+&uart {
+	status = "okay";
+};
+
+&gpio {
+	status = "okay";
+};
+
+&usb {
+	dr_mode = "host";
+	vbus-supply = <&reg_usb_vbus>;
+	status = "okay";
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&spi {
+	num-chipselects = <1>;
+	status = "okay";
+
+	/* Spansion S25FL032PIF SPI flash */
+	spiflash: s25sl032p@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "spansion,s25sl032p", "jedec,spi-nor";
+		spi-max-frequency = <104000000>;
+		reg = <0>;
+	};
+};
diff --git a/arch/mips/boot/tools/.gitignore b/arch/mips/boot/tools/.gitignore
new file mode 100644
index 0000000..be0ed06
--- /dev/null
+++ b/arch/mips/boot/tools/.gitignore
@@ -0,0 +1 @@
+relocs
diff --git a/arch/mips/boot/tools/Makefile b/arch/mips/boot/tools/Makefile
new file mode 100644
index 0000000..d232a68
--- /dev/null
+++ b/arch/mips/boot/tools/Makefile
@@ -0,0 +1,8 @@
+
+hostprogs-y	+= relocs
+relocs-objs	+= relocs_32.o
+relocs-objs	+= relocs_64.o
+relocs-objs	+= relocs_main.o
+PHONY += relocs
+relocs: $(obj)/relocs
+	@:
diff --git a/arch/mips/boot/tools/relocs.c b/arch/mips/boot/tools/relocs.c
new file mode 100644
index 0000000..b9cbf78
--- /dev/null
+++ b/arch/mips/boot/tools/relocs.c
@@ -0,0 +1,680 @@
+/* This is included from relocs_32/64.c */
+
+#define ElfW(type)		_ElfW(ELF_BITS, type)
+#define _ElfW(bits, type)	__ElfW(bits, type)
+#define __ElfW(bits, type)	Elf##bits##_##type
+
+#define Elf_Addr		ElfW(Addr)
+#define Elf_Ehdr		ElfW(Ehdr)
+#define Elf_Phdr		ElfW(Phdr)
+#define Elf_Shdr		ElfW(Shdr)
+#define Elf_Sym			ElfW(Sym)
+
+static Elf_Ehdr ehdr;
+
+struct relocs {
+	uint32_t	*offset;
+	unsigned long	count;
+	unsigned long	size;
+};
+
+static struct relocs relocs;
+
+struct section {
+	Elf_Shdr       shdr;
+	struct section *link;
+	Elf_Sym        *symtab;
+	Elf_Rel        *reltab;
+	char           *strtab;
+	long           shdr_offset;
+};
+static struct section *secs;
+
+static const char * const regex_sym_kernel = {
+/* Symbols matching these regex's should never be relocated */
+	"^(__crc_)",
+};
+
+static regex_t sym_regex_c;
+
+static int regex_skip_reloc(const char *sym_name)
+{
+	return !regexec(&sym_regex_c, sym_name, 0, NULL, 0);
+}
+
+static void regex_init(void)
+{
+	char errbuf[128];
+	int err;
+
+	err = regcomp(&sym_regex_c, regex_sym_kernel,
+			REG_EXTENDED|REG_NOSUB);
+
+	if (err) {
+		regerror(err, &sym_regex_c, errbuf, sizeof(errbuf));
+		die("%s", errbuf);
+	}
+}
+
+static const char *rel_type(unsigned type)
+{
+	static const char * const type_name[] = {
+#define REL_TYPE(X)[X] = #X
+		REL_TYPE(R_MIPS_NONE),
+		REL_TYPE(R_MIPS_16),
+		REL_TYPE(R_MIPS_32),
+		REL_TYPE(R_MIPS_REL32),
+		REL_TYPE(R_MIPS_26),
+		REL_TYPE(R_MIPS_HI16),
+		REL_TYPE(R_MIPS_LO16),
+		REL_TYPE(R_MIPS_GPREL16),
+		REL_TYPE(R_MIPS_LITERAL),
+		REL_TYPE(R_MIPS_GOT16),
+		REL_TYPE(R_MIPS_PC16),
+		REL_TYPE(R_MIPS_CALL16),
+		REL_TYPE(R_MIPS_GPREL32),
+		REL_TYPE(R_MIPS_64),
+		REL_TYPE(R_MIPS_HIGHER),
+		REL_TYPE(R_MIPS_HIGHEST),
+		REL_TYPE(R_MIPS_PC21_S2),
+		REL_TYPE(R_MIPS_PC26_S2),
+#undef REL_TYPE
+	};
+	const char *name = "unknown type rel type name";
+
+	if (type < ARRAY_SIZE(type_name) && type_name[type])
+		name = type_name[type];
+	return name;
+}
+
+static const char *sec_name(unsigned shndx)
+{
+	const char *sec_strtab;
+	const char *name;
+
+	sec_strtab = secs[ehdr.e_shstrndx].strtab;
+	if (shndx < ehdr.e_shnum)
+		name = sec_strtab + secs[shndx].shdr.sh_name;
+	else if (shndx == SHN_ABS)
+		name = "ABSOLUTE";
+	else if (shndx == SHN_COMMON)
+		name = "COMMON";
+	else
+		name = "<noname>";
+	return name;
+}
+
+static struct section *sec_lookup(const char *secname)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++)
+		if (strcmp(secname, sec_name(i)) == 0)
+			return &secs[i];
+
+	return NULL;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
+{
+	const char *name;
+
+	if (sym->st_name)
+		name = sym_strtab + sym->st_name;
+	else
+		name = sec_name(sym->st_shndx);
+	return name;
+}
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+#define be16_to_cpu(val) bswap_16(val)
+#define be32_to_cpu(val) bswap_32(val)
+#define be64_to_cpu(val) bswap_64(val)
+
+#define cpu_to_le16(val) (val)
+#define cpu_to_le32(val) (val)
+#define cpu_to_le64(val) (val)
+#define cpu_to_be16(val) bswap_16(val)
+#define cpu_to_be32(val) bswap_32(val)
+#define cpu_to_be64(val) bswap_64(val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#define le64_to_cpu(val) bswap_64(val)
+#define be16_to_cpu(val) (val)
+#define be32_to_cpu(val) (val)
+#define be64_to_cpu(val) (val)
+
+#define cpu_to_le16(val) bswap_16(val)
+#define cpu_to_le32(val) bswap_32(val)
+#define cpu_to_le64(val) bswap_64(val)
+#define cpu_to_be16(val) (val)
+#define cpu_to_be32(val) (val)
+#define cpu_to_be64(val) (val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return le16_to_cpu(val);
+	else
+		return be16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return le32_to_cpu(val);
+	else
+		return be32_to_cpu(val);
+}
+
+static uint32_t cpu_to_elf32(uint32_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return cpu_to_le32(val);
+	else
+		return cpu_to_be32(val);
+}
+
+#define elf_half_to_cpu(x)	elf16_to_cpu(x)
+#define elf_word_to_cpu(x)	elf32_to_cpu(x)
+
+#if ELF_BITS == 64
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return le64_to_cpu(val);
+	else
+		return be64_to_cpu(val);
+}
+#define elf_addr_to_cpu(x)	elf64_to_cpu(x)
+#define elf_off_to_cpu(x)	elf64_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf64_to_cpu(x)
+#else
+#define elf_addr_to_cpu(x)	elf32_to_cpu(x)
+#define elf_off_to_cpu(x)	elf32_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf32_to_cpu(x)
+#endif
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+		die("Cannot read ELF header: %s\n", strerror(errno));
+
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0)
+		die("No ELF magic\n");
+
+	if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+		die("Not a %d bit executable\n", ELF_BITS);
+
+	if ((ehdr.e_ident[EI_DATA] != ELFDATA2LSB) &&
+	    (ehdr.e_ident[EI_DATA] != ELFDATA2MSB))
+		die("Unknown ELF Endianness\n");
+
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf_half_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf_half_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf_word_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf_addr_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf_off_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf_off_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf_word_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf_half_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf_half_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf_half_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf_half_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
+		die("Unsupported ELF header type\n");
+
+	if (ehdr.e_machine != ELF_MACHINE)
+		die("Not for %s\n", ELF_MACHINE_NAME);
+
+	if (ehdr.e_version != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
+		die("Bad Elf header size\n");
+
+	if (ehdr.e_phentsize != sizeof(Elf_Phdr))
+		die("Bad program header entry\n");
+
+	if (ehdr.e_shentsize != sizeof(Elf_Shdr))
+		die("Bad section header entry\n");
+
+	if (ehdr.e_shstrndx >= ehdr.e_shnum)
+		die("String table index out of bounds\n");
+}
+
+static void read_shdrs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	secs = calloc(ehdr.e_shnum, sizeof(struct section));
+	if (!secs)
+		die("Unable to allocate %d section headers\n", ehdr.e_shnum);
+
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+		die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		sec->shdr_offset = ftell(fp);
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
+		sec->shdr.sh_type      = elf_word_to_cpu(shdr.sh_type);
+		sec->shdr.sh_flags     = elf_xword_to_cpu(shdr.sh_flags);
+		sec->shdr.sh_addr      = elf_addr_to_cpu(shdr.sh_addr);
+		sec->shdr.sh_offset    = elf_off_to_cpu(shdr.sh_offset);
+		sec->shdr.sh_size      = elf_xword_to_cpu(shdr.sh_size);
+		sec->shdr.sh_link      = elf_word_to_cpu(shdr.sh_link);
+		sec->shdr.sh_info      = elf_word_to_cpu(shdr.sh_info);
+		sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
+		sec->shdr.sh_entsize   = elf_xword_to_cpu(shdr.sh_entsize);
+		if (sec->shdr.sh_link < ehdr.e_shnum)
+			sec->link = &secs[sec->shdr.sh_link];
+	}
+}
+
+static void read_strtabs(FILE *fp)
+{
+	int i;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_STRTAB)
+			continue;
+
+		sec->strtab = malloc(sec->shdr.sh_size);
+		if (!sec->strtab)
+			die("malloc of %d bytes for strtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+	}
+}
+
+static void read_symtabs(FILE *fp)
+{
+	int i, j;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+		if (sec->shdr.sh_type != SHT_SYMTAB)
+			continue;
+
+		sec->symtab = malloc(sec->shdr.sh_size);
+		if (!sec->symtab)
+			die("malloc of %d bytes for symtab failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) {
+			Elf_Sym *sym = &sec->symtab[j];
+
+			sym->st_name  = elf_word_to_cpu(sym->st_name);
+			sym->st_value = elf_addr_to_cpu(sym->st_value);
+			sym->st_size  = elf_xword_to_cpu(sym->st_size);
+			sym->st_shndx = elf_half_to_cpu(sym->st_shndx);
+		}
+	}
+}
+
+static void read_relocs(FILE *fp)
+{
+	static unsigned long base = 0;
+	int i, j;
+
+	if (!base) {
+		struct section *sec = sec_lookup(".text");
+
+		if (!sec)
+			die("Could not find .text section\n");
+
+		base = sec->shdr.sh_addr;
+	}
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec->reltab = malloc(sec->shdr.sh_size);
+		if (!sec->reltab)
+			die("malloc of %d bytes for relocs failed\n",
+			    sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) !=
+		    sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+
+			rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+			/* Set offset into kernel image */
+			rel->r_offset -= base;
+#if (ELF_BITS == 32)
+			rel->r_info   = elf_xword_to_cpu(rel->r_info);
+#else
+			/* Convert MIPS64 RELA format - only the symbol
+			 * index needs converting to native endianness
+			 */
+			rel->r_info   = rel->r_info;
+			ELF_R_SYM(rel->r_info) = elf32_to_cpu(ELF_R_SYM(rel->r_info));
+#endif
+#if (SHT_REL_TYPE == SHT_RELA)
+			rel->r_addend = elf_xword_to_cpu(rel->r_addend);
+#endif
+		}
+	}
+}
+
+static void remove_relocs(FILE *fp)
+{
+	int i;
+	Elf_Shdr shdr;
+
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+
+		/* Set relocation section size to 0, effectively removing it.
+		 * This is necessary due to lack of support for relocations
+		 * in objcopy when creating 32bit elf from 64bit elf.
+		 */
+		shdr.sh_size = 0;
+
+		if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n",
+			    sec->shdr_offset, strerror(errno));
+
+		if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot write ELF section headers %d/%d: %s\n",
+			    i, ehdr.e_shnum, strerror(errno));
+	}
+}
+
+static void add_reloc(struct relocs *r, uint32_t offset, unsigned type)
+{
+	/* Relocation representation in binary table:
+	 * |76543210|76543210|76543210|76543210|
+	 * |  Type  |  offset from _text >> 2  |
+	 */
+	offset >>= 2;
+	if (offset > 0x00FFFFFF)
+		die("Kernel image exceeds maximum size for relocation!\n");
+
+	offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24);
+
+	if (r->count == r->size) {
+		unsigned long newsize = r->size + 50000;
+		void *mem = realloc(r->offset, newsize * sizeof(r->offset[0]));
+
+		if (!mem)
+			die("realloc failed\n");
+
+		r->offset = mem;
+		r->size = newsize;
+	}
+	r->offset[r->count++] = offset;
+}
+
+static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
+			Elf_Sym *sym, const char *symname))
+{
+	int i;
+
+	/* Walk through the relocations */
+	for (i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf_Sym *sh_symtab;
+		struct section *sec_applies, *sec_symtab;
+		int j;
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec_symtab  = sec->link;
+		sec_applies = &secs[sec->shdr.sh_info];
+		if (!(sec_applies->shdr.sh_flags & SHF_ALLOC))
+			continue;
+
+		sh_symtab = sec_symtab->symtab;
+		sym_strtab = sec_symtab->link->strtab;
+		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+			Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)];
+			const char *symname = sym_name(sym_strtab, sym);
+
+			process(sec, rel, sym, symname);
+		}
+	}
+}
+
+static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
+		      const char *symname)
+{
+	unsigned r_type = ELF_R_TYPE(rel->r_info);
+	unsigned bind = ELF_ST_BIND(sym->st_info);
+
+	if ((bind == STB_WEAK) && (sym->st_value == 0)) {
+		/* Don't relocate weak symbols without a target */
+		return 0;
+	}
+
+	if (regex_skip_reloc(symname))
+		return 0;
+
+	switch (r_type) {
+	case R_MIPS_NONE:
+	case R_MIPS_REL32:
+	case R_MIPS_PC16:
+	case R_MIPS_PC21_S2:
+	case R_MIPS_PC26_S2:
+		/*
+		 * NONE can be ignored and PC relative relocations don't
+		 * need to be adjusted.
+		 */
+	case R_MIPS_HIGHEST:
+	case R_MIPS_HIGHER:
+		/* We support relocating within the same 4Gb segment only,
+		 * thus leaving the top 32bits unchanged
+		 */
+	case R_MIPS_LO16:
+		/* We support relocating by 64k jumps only
+		 * thus leaving the bottom 16bits unchanged
+		 */
+		break;
+
+	case R_MIPS_64:
+	case R_MIPS_32:
+	case R_MIPS_26:
+	case R_MIPS_HI16:
+		add_reloc(&relocs, rel->r_offset, r_type);
+		break;
+
+	default:
+		die("Unsupported relocation type: %s (%d)\n",
+		    rel_type(r_type), r_type);
+		break;
+	}
+
+	return 0;
+}
+
+static int write_reloc_as_bin(uint32_t v, FILE *f)
+{
+	unsigned char buf[4];
+
+	v = cpu_to_elf32(v);
+
+	memcpy(buf, &v, sizeof(uint32_t));
+	return fwrite(buf, 1, 4, f);
+}
+
+static int write_reloc_as_text(uint32_t v, FILE *f)
+{
+	int res;
+
+	res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v);
+	if (res < 0)
+		return res;
+	else
+		return sizeof(uint32_t);
+}
+
+static void emit_relocs(int as_text, int as_bin, FILE *outf)
+{
+	int i;
+	int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin;
+	int size = 0;
+	int size_reserved;
+	struct section *sec_reloc;
+
+	sec_reloc = sec_lookup(".data.reloc");
+	if (!sec_reloc)
+		die("Could not find relocation section\n");
+
+	size_reserved = sec_reloc->shdr.sh_size;
+
+	/* Collect up the relocations */
+	walk_relocs(do_reloc);
+
+	/* Print the relocations */
+	if (as_text) {
+		/* Print the relocations in a form suitable that
+		 * gas will like.
+		 */
+		printf(".section \".data.reloc\",\"a\"\n");
+		printf(".balign 4\n");
+		/* Output text to stdout */
+		write_reloc = write_reloc_as_text;
+		outf = stdout;
+	} else if (as_bin) {
+		/* Output raw binary to stdout */
+		outf = stdout;
+	} else {
+		/* Seek to offset of the relocation section.
+		* Each relocation is then written into the
+		* vmlinux kernel image.
+		*/
+		if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				sec_reloc->shdr.sh_offset, strerror(errno));
+		}
+	}
+
+	for (i = 0; i < relocs.count; i++)
+		size += write_reloc(relocs.offset[i], outf);
+
+	/* Print a stop, but only if we've actually written some relocs */
+	if (size)
+		size += write_reloc(0, outf);
+
+	if (size > size_reserved)
+		/* Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE
+		 * which will fix this problem and allow a bit of headroom
+		 * if more kernel features are enabled
+		 */
+		die("Relocations overflow available space!\n" \
+		    "Please adjust CONFIG_RELOCATION_TABLE_SIZE " \
+		    "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF);
+}
+
+/*
+ * As an aid to debugging problems with different linkers
+ * print summary information about the relocs.
+ * Since different linkers tend to emit the sections in
+ * different orders we use the section names in the output.
+ */
+static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
+				const char *symname)
+{
+	printf("%16s  0x%08x  %16s  %40s  %16s\n",
+		sec_name(sec->shdr.sh_info),
+		(unsigned int)rel->r_offset,
+		rel_type(ELF_R_TYPE(rel->r_info)),
+		symname,
+		sec_name(sym->st_shndx));
+	return 0;
+}
+
+static void print_reloc_info(void)
+{
+	printf("%16s  %10s  %16s  %40s  %16s\n",
+		"reloc section",
+		"offset",
+		"reloc type",
+		"symbol",
+		"symbol section");
+	walk_relocs(do_reloc_info);
+}
+
+#if ELF_BITS == 64
+# define process process_64
+#else
+# define process process_32
+#endif
+
+void process(FILE *fp, int as_text, int as_bin,
+	     int show_reloc_info, int keep_relocs)
+{
+	regex_init();
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_strtabs(fp);
+	read_symtabs(fp);
+	read_relocs(fp);
+	if (show_reloc_info) {
+		print_reloc_info();
+		return;
+	}
+	emit_relocs(as_text, as_bin, fp);
+	if (!keep_relocs)
+		remove_relocs(fp);
+}
diff --git a/arch/mips/boot/tools/relocs.h b/arch/mips/boot/tools/relocs.h
new file mode 100644
index 0000000..3cf676f
--- /dev/null
+++ b/arch/mips/boot/tools/relocs.h
@@ -0,0 +1,45 @@
+#ifndef RELOCS_H
+#define RELOCS_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <regex.h>
+
+void die(char *fmt, ...);
+
+/*
+ * Introduced for MIPSr6
+ */
+#ifndef R_MIPS_PC21_S2
+#define R_MIPS_PC21_S2		60
+#endif
+
+#ifndef R_MIPS_PC26_S2
+#define R_MIPS_PC26_S2		61
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+enum symtype {
+	S_ABS,
+	S_REL,
+	S_SEG,
+	S_LIN,
+	S_NSYMTYPES
+};
+
+void process_32(FILE *fp, int as_text, int as_bin,
+		int show_reloc_info, int keep_relocs);
+void process_64(FILE *fp, int as_text, int as_bin,
+		int show_reloc_info, int keep_relocs);
+#endif /* RELOCS_H */
diff --git a/arch/mips/boot/tools/relocs_32.c b/arch/mips/boot/tools/relocs_32.c
new file mode 100644
index 0000000..915bdc0
--- /dev/null
+++ b/arch/mips/boot/tools/relocs_32.c
@@ -0,0 +1,17 @@
+#include "relocs.h"
+
+#define ELF_BITS 32
+
+#define ELF_MACHINE		EM_MIPS
+#define ELF_MACHINE_NAME	"MIPS"
+#define SHT_REL_TYPE		SHT_REL
+#define Elf_Rel			ElfW(Rel)
+
+#define ELF_CLASS		ELFCLASS32
+#define ELF_R_SYM(val)		ELF32_R_SYM(val)
+#define ELF_R_TYPE(val)		ELF32_R_TYPE(val)
+#define ELF_ST_TYPE(o)		ELF32_ST_TYPE(o)
+#define ELF_ST_BIND(o)		ELF32_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o)	ELF32_ST_VISIBILITY(o)
+
+#include "relocs.c"
diff --git a/arch/mips/boot/tools/relocs_64.c b/arch/mips/boot/tools/relocs_64.c
new file mode 100644
index 0000000..b671b5e
--- /dev/null
+++ b/arch/mips/boot/tools/relocs_64.c
@@ -0,0 +1,27 @@
+#include "relocs.h"
+
+#define ELF_BITS 64
+
+#define ELF_MACHINE             EM_MIPS
+#define ELF_MACHINE_NAME        "MIPS64"
+#define SHT_REL_TYPE            SHT_RELA
+#define Elf_Rel                 Elf64_Rela
+
+typedef uint8_t Elf64_Byte;
+
+typedef struct {
+	Elf64_Word r_sym;	/* Symbol index.  */
+	Elf64_Byte r_ssym;	/* Special symbol.  */
+	Elf64_Byte r_type3;	/* Third relocation.  */
+	Elf64_Byte r_type2;	/* Second relocation.  */
+	Elf64_Byte r_type;	/* First relocation.  */
+} Elf64_Mips_Rela;
+
+#define ELF_CLASS               ELFCLASS64
+#define ELF_R_SYM(val)          (((Elf64_Mips_Rela *)(&val))->r_sym)
+#define ELF_R_TYPE(val)         (((Elf64_Mips_Rela *)(&val))->r_type)
+#define ELF_ST_TYPE(o)          ELF64_ST_TYPE(o)
+#define ELF_ST_BIND(o)          ELF64_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o)    ELF64_ST_VISIBILITY(o)
+
+#include "relocs.c"
diff --git a/arch/mips/boot/tools/relocs_main.c b/arch/mips/boot/tools/relocs_main.c
new file mode 100644
index 0000000..d8fe234
--- /dev/null
+++ b/arch/mips/boot/tools/relocs_main.c
@@ -0,0 +1,84 @@
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <endian.h>
+#include <elf.h>
+
+#include "relocs.h"
+
+void die(char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static void usage(void)
+{
+	die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	int show_reloc_info, as_text, as_bin, keep_relocs;
+	const char *fname;
+	FILE *fp;
+	int i;
+	unsigned char e_ident[EI_NIDENT];
+
+	show_reloc_info = 0;
+	as_text = 0;
+	as_bin = 0;
+	keep_relocs = 0;
+	fname = NULL;
+	for (i = 1; i < argc; i++) {
+		char *arg = argv[i];
+
+		if (*arg == '-') {
+			if (strcmp(arg, "--reloc-info") == 0) {
+				show_reloc_info = 1;
+				continue;
+			}
+			if (strcmp(arg, "--text") == 0) {
+				as_text = 1;
+				continue;
+			}
+			if (strcmp(arg, "--bin") == 0) {
+				as_bin = 1;
+				continue;
+			}
+			if (strcmp(arg, "--keep") == 0) {
+				keep_relocs = 1;
+				continue;
+			}
+		} else if (!fname) {
+			fname = arg;
+			continue;
+		}
+		usage();
+	}
+	if (!fname)
+		usage();
+
+	fp = fopen(fname, "r+");
+	if (!fp)
+		die("Cannot open %s: %s\n", fname, strerror(errno));
+
+	if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT)
+		die("Cannot read %s: %s", fname, strerror(errno));
+
+	rewind(fp);
+	if (e_ident[EI_CLASS] == ELFCLASS64)
+		process_64(fp, as_text,  as_bin, show_reloc_info, keep_relocs);
+	else
+		process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs);
+	fclose(fp);
+	return 0;
+}
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 1882e64..23c2344 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -19,6 +19,7 @@
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>
 #include <asm/octeon/cvmx-rst-defs.h>
+#include <asm/octeon/cvmx-fpa-defs.h>
 
 static u64 f;
 static u64 rdiv;
@@ -65,9 +66,13 @@
  */
 void octeon_init_cvmcount(void)
 {
+	u64 clk_reg;
 	unsigned long flags;
 	unsigned loops = 2;
 
+	clk_reg = octeon_has_feature(OCTEON_FEATURE_FPA3) ?
+		CVMX_FPA_CLK_COUNT : CVMX_IPD_CLK_COUNT;
+
 	/* Clobber loops so GCC will not unroll the following while loop. */
 	asm("" : "+r" (loops));
 
@@ -77,18 +82,18 @@
 	 * which should give more deterministic timing.
 	 */
 	while (loops--) {
-		u64 ipd_clk_count = cvmx_read_csr(CVMX_IPD_CLK_COUNT);
+		u64 clk_count = cvmx_read_csr(clk_reg);
 		if (rdiv != 0) {
-			ipd_clk_count *= rdiv;
+			clk_count *= rdiv;
 			if (f != 0) {
 				asm("dmultu\t%[cnt],%[f]\n\t"
 				    "mfhi\t%[cnt]"
-				    : [cnt] "+r" (ipd_clk_count)
+				    : [cnt] "+r" (clk_count)
 				    : [f] "r" (f)
 				    : "hi", "lo");
 			}
 		}
-		write_c0_cvmcount(ipd_clk_count);
+		write_c0_cvmcount(clk_count);
 	}
 	local_irq_restore(flags);
 }
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
index 376701f..ff26d02 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c
@@ -87,6 +87,8 @@
 		return 9;
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN52XX))
 		return 4;
+	if (OCTEON_IS_MODEL(OCTEON_CN7XXX))
+		return 5;
 	else
 		return 3;
 }
@@ -260,6 +262,41 @@
 }
 
 /**
+ * @INTERNAL
+ * Return interface mode for CN7XXX.
+ */
+static cvmx_helper_interface_mode_t __cvmx_get_mode_cn7xxx(int interface)
+{
+	union cvmx_gmxx_inf_mode mode;
+
+	mode.u64 = cvmx_read_csr(CVMX_GMXX_INF_MODE(interface));
+
+	switch (interface) {
+	case 0:
+	case 1:
+		switch (mode.cn68xx.mode) {
+		case 0:
+			return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+		case 1:
+		case 2:
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		case 3:
+			return CVMX_HELPER_INTERFACE_MODE_XAUI;
+		default:
+			return CVMX_HELPER_INTERFACE_MODE_SGMII;
+		}
+	case 2:
+		return CVMX_HELPER_INTERFACE_MODE_NPI;
+	case 3:
+		return CVMX_HELPER_INTERFACE_MODE_LOOP;
+	case 4:
+		return CVMX_HELPER_INTERFACE_MODE_RGMII;
+	default:
+		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
+	}
+}
+
+/**
  * Get the operating mode of an interface. Depending on the Octeon
  * chip and configuration, this function returns an enumeration
  * of the type of packet I/O supported by an interface.
@@ -278,6 +315,12 @@
 		return CVMX_HELPER_INTERFACE_MODE_DISABLED;
 
 	/*
+	 * OCTEON III models
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN7XXX))
+		return __cvmx_get_mode_cn7xxx(interface);
+
+	/*
 	 * Octeon II models
 	 */
 	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX))
diff --git a/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c b/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c
index 3d17fac..cc1b1d2 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c
@@ -32,86 +32,22 @@
 #include <linux/module.h>
 
 #include <asm/octeon/cvmx.h>
-#include <asm/octeon/cvmx-spinlock.h>
 #include <asm/octeon/cvmx-sysinfo.h>
 
-/**
+/*
  * This structure defines the private state maintained by sysinfo module.
- *
  */
-static struct {
-	struct cvmx_sysinfo sysinfo;	   /* system information */
-	cvmx_spinlock_t lock;	   /* mutex spinlock */
-
-} state = {
-	.lock = CVMX_SPINLOCK_UNLOCKED_INITIALIZER
-};
-
+static struct cvmx_sysinfo sysinfo;	   /* system information */
 
 /*
- * Global variables that define the min/max of the memory region set
- * up for 32 bit userspace access.
- */
-uint64_t linux_mem32_min;
-uint64_t linux_mem32_max;
-uint64_t linux_mem32_wired;
-uint64_t linux_mem32_offset;
-
-/**
- * This function returns the application information as obtained
+ * Returns the application information as obtained
  * by the bootloader.  This provides the core mask of the cores
  * running the same application image, as well as the physical
  * memory regions available to the core.
- *
- * Returns  Pointer to the boot information structure
- *
  */
 struct cvmx_sysinfo *cvmx_sysinfo_get(void)
 {
-	return &(state.sysinfo);
+	return &sysinfo;
 }
 EXPORT_SYMBOL(cvmx_sysinfo_get);
 
-/**
- * This function is used in non-simple executive environments (such as
- * Linux kernel, u-boot, etc.)	to configure the minimal fields that
- * are required to use simple executive files directly.
- *
- * Locking (if required) must be handled outside of this
- * function
- *
- * @phy_mem_desc_ptr:
- *		     Pointer to global physical memory descriptor
- *		     (bootmem descriptor) @board_type: Octeon board
- *		     type enumeration
- *
- * @board_rev_major:
- *		     Board major revision
- * @board_rev_minor:
- *		     Board minor revision
- * @cpu_clock_hz:
- *		     CPU clock freqency in hertz
- *
- * Returns 0: Failure
- *	   1: success
- */
-int cvmx_sysinfo_minimal_initialize(void *phy_mem_desc_ptr,
-				    uint16_t board_type,
-				    uint8_t board_rev_major,
-				    uint8_t board_rev_minor,
-				    uint32_t cpu_clock_hz)
-{
-
-	/* The sysinfo structure was already initialized */
-	if (state.sysinfo.board_type)
-		return 0;
-
-	memset(&(state.sysinfo), 0x0, sizeof(state.sysinfo));
-	state.sysinfo.phy_mem_desc_ptr = phy_mem_desc_ptr;
-	state.sysinfo.board_type = board_type;
-	state.sysinfo.board_rev_major = board_rev_major;
-	state.sysinfo.board_rev_minor = board_rev_minor;
-	state.sysinfo.cpu_clock_hz = cpu_clock_hz;
-
-	return 1;
-}
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c
index b2104bd..d08a2bc 100644
--- a/arch/mips/cavium-octeon/executive/octeon-model.c
+++ b/arch/mips/cavium-octeon/executive/octeon-model.c
@@ -71,11 +71,11 @@
 	uint32_t fuse_data = 0;
 
 	fus3.u64 = 0;
-	if (!OCTEON_IS_MODEL(OCTEON_CN6XXX))
+	if (OCTEON_IS_MODEL(OCTEON_CN3XXX) || OCTEON_IS_MODEL(OCTEON_CN5XXX))
 		fus3.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
 	fus_dat2.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT2);
 	fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
-	num_cores = cvmx_pop(cvmx_read_csr(CVMX_CIU_FUSE));
+	num_cores = cvmx_octeon_num_cores();
 
 	/* Make sure the non existent devices look disabled */
 	switch ((chip_id >> 8) & 0xff) {
@@ -121,6 +121,15 @@
 	 * later.
 	 */
 	switch (num_cores) {
+	case 48:
+		core_model = "90";
+		break;
+	case 44:
+		core_model = "88";
+		break;
+	case 40:
+		core_model = "85";
+		break;
 	case 32:
 		core_model = "80";
 		break;
@@ -297,7 +306,7 @@
 				if (fus_dat3.s.nozip)
 					suffix = "SCP";
 
-				if (fus_dat3.s.bar2_en)
+				if (fus_dat3.cn56xx.bar2_en)
 					suffix = "NSPB2";
 			}
 			if (fus3.cn56xx.crip_1024k)
@@ -369,6 +378,73 @@
 		else
 			suffix = "AAP";
 		break;
+	case 0x94:		/* CNF71XX */
+		family = "F71";
+		if (fus_dat3.cnf71xx.nozip)
+			suffix = "SCP";
+		else
+			suffix = "AAP";
+		break;
+	case 0x95:		/* CN78XX */
+		if (num_cores == 6)	/* Other core counts match generic */
+			core_model = "35";
+		if (OCTEON_IS_MODEL(OCTEON_CN76XX))
+			family = "76";
+		else
+			family = "78";
+		if (fus_dat3.cn78xx.l2c_crip == 2)
+			family = "77";
+		if (fus_dat3.cn78xx.nozip
+		    && fus_dat3.cn78xx.nodfa_dte
+		    && fus_dat3.cn78xx.nohna_dte) {
+			if (fus_dat3.cn78xx.nozip &&
+				!fus_dat2.cn78xx.raid_en &&
+				fus_dat3.cn78xx.nohna_dte) {
+				suffix = "CP";
+			} else {
+				suffix = "SCP";
+			}
+		} else if (fus_dat2.cn78xx.raid_en == 0)
+			suffix = "HCP";
+		else
+			suffix = "AAP";
+		break;
+	case 0x96:		/* CN70XX */
+		family = "70";
+		if (cvmx_read_csr(CVMX_MIO_FUS_PDF) & (0x1ULL << 32))
+			family = "71";
+		if (fus_dat2.cn70xx.nocrypto)
+			suffix = "CP";
+		else if (fus_dat3.cn70xx.nodfa_dte)
+			suffix = "SCP";
+		else
+			suffix = "AAP";
+		break;
+	case 0x97:		/* CN73XX */
+		if (num_cores == 6)	/* Other core counts match generic */
+			core_model = "35";
+		family = "73";
+		if (fus_dat3.cn73xx.l2c_crip == 2)
+			family = "72";
+		if (fus_dat3.cn73xx.nozip
+				&& fus_dat3.cn73xx.nodfa_dte
+				&& fus_dat3.cn73xx.nohna_dte) {
+			if (!fus_dat2.cn73xx.raid_en)
+				suffix = "CP";
+			else
+				suffix = "SCP";
+		} else
+			suffix = "AAP";
+		break;
+	case 0x98:		/* CN75XX */
+		family = "F75";
+		if (fus_dat3.cn78xx.nozip
+		    && fus_dat3.cn78xx.nodfa_dte
+		    && fus_dat3.cn78xx.nohna_dte)
+			suffix = "SCP";
+		else
+			suffix = "AAP";
+		break;
 	default:
 		family = "XX";
 		core_model = "XX";
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 4f9eb05..368eb49 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2014 Cavium, Inc.
+ * Copyright (C) 2004-2016 Cavium, Inc.
  */
 
 #include <linux/of_address.h>
@@ -19,16 +19,53 @@
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-ciu2-defs.h>
+#include <asm/octeon/cvmx-ciu3-defs.h>
 
 static DEFINE_PER_CPU(unsigned long, octeon_irq_ciu0_en_mirror);
 static DEFINE_PER_CPU(unsigned long, octeon_irq_ciu1_en_mirror);
 static DEFINE_PER_CPU(raw_spinlock_t, octeon_irq_ciu_spinlock);
+static DEFINE_PER_CPU(unsigned int, octeon_irq_ciu3_idt_ip2);
+
+static DEFINE_PER_CPU(unsigned int, octeon_irq_ciu3_idt_ip3);
+static DEFINE_PER_CPU(struct octeon_ciu3_info *, octeon_ciu3_info);
+#define CIU3_MBOX_PER_CORE 10
+
+/*
+ * The 8 most significant bits of the intsn identify the interrupt major block.
+ * Each major block might use its own interrupt domain. Thus 256 domains are
+ * needed.
+ */
+#define MAX_CIU3_DOMAINS		256
+
+typedef irq_hw_number_t (*octeon_ciu3_intsn2hw_t)(struct irq_domain *, unsigned int);
+
+/* Information for each ciu3 in the system */
+struct octeon_ciu3_info {
+	u64			ciu3_addr;
+	int			node;
+	struct irq_domain	*domain[MAX_CIU3_DOMAINS];
+	octeon_ciu3_intsn2hw_t	intsn2hw[MAX_CIU3_DOMAINS];
+};
+
+/* Each ciu3 in the system uses its own data (one ciu3 per node) */
+static struct octeon_ciu3_info	*octeon_ciu3_info_per_node[4];
 
 struct octeon_irq_ciu_domain_data {
 	int num_sum;  /* number of sum registers (2 or 3). */
 };
 
-static __read_mostly u8 octeon_irq_ciu_to_irq[8][64];
+/* Register offsets from ciu3_addr */
+#define CIU3_CONST		0x220
+#define CIU3_IDT_CTL(_idt)	((_idt) * 8 + 0x110000)
+#define CIU3_IDT_PP(_idt, _idx)	((_idt) * 32 + (_idx) * 8 + 0x120000)
+#define CIU3_IDT_IO(_idt)	((_idt) * 8 + 0x130000)
+#define CIU3_DEST_PP_INT(_pp_ip) ((_pp_ip) * 8 + 0x200000)
+#define CIU3_DEST_IO_INT(_io)	((_io) * 8 + 0x210000)
+#define CIU3_ISC_CTL(_intsn)	((_intsn) * 8 + 0x80000000)
+#define CIU3_ISC_W1C(_intsn)	((_intsn) * 8 + 0x90000000)
+#define CIU3_ISC_W1S(_intsn)	((_intsn) * 8 + 0xa0000000)
+
+static __read_mostly int octeon_irq_ciu_to_irq[8][64];
 
 struct octeon_ciu_chip_data {
 	union {
@@ -39,10 +76,11 @@
 		struct {		/* only used for ciu/ciu2 */
 			u8 line;
 			u8 bit;
-			u8 gpio_line;
 		};
 	};
+	int gpio_line;
 	int current_cpu;	/* Next CPU expected to take this irq */
+	int ciu_node; /* NUMA node number of the CIU */
 };
 
 struct octeon_core_chip_data {
@@ -626,6 +664,18 @@
 	}
 }
 
+static int octeon_irq_ciu_set_type(struct irq_data *data, unsigned int t)
+{
+	irqd_set_trigger_type(data, t);
+
+	if (t & IRQ_TYPE_EDGE_BOTH)
+		irq_set_handler_locked(data, handle_edge_irq);
+	else
+		irq_set_handler_locked(data, handle_level_irq);
+
+	return IRQ_SET_MASK_OK;
+}
+
 static void octeon_irq_gpio_setup(struct irq_data *data)
 {
 	union cvmx_gpio_bit_cfgx cfg;
@@ -663,7 +713,7 @@
 	irqd_set_trigger_type(data, t);
 	octeon_irq_gpio_setup(data);
 
-	if (irqd_get_trigger_type(data) & IRQ_TYPE_EDGE_BOTH)
+	if (t & IRQ_TYPE_EDGE_BOTH)
 		irq_set_handler_locked(data, handle_edge_irq);
 	else
 		irq_set_handler_locked(data, handle_level_irq);
@@ -863,6 +913,16 @@
 }
 #endif
 
+static unsigned int edge_startup(struct irq_data *data)
+{
+	/* ack any pending edge-irq at startup, so there is
+	 * an _edge_ to fire on when the event reappears.
+	 */
+	data->chip->irq_ack(data);
+	data->chip->irq_enable(data);
+	return 0;
+}
+
 /*
  * Newer octeon chips have support for lockless CIU operation.
  */
@@ -1158,16 +1218,6 @@
 static struct irq_chip *octeon_irq_ciu_chip_edge;
 static struct irq_chip *octeon_irq_gpio_chip;
 
-static bool octeon_irq_virq_in_range(unsigned int virq)
-{
-	/* We cannot let it overflow the mapping array. */
-	if (virq < (1ul << 8 * sizeof(octeon_irq_ciu_to_irq[0][0])))
-		return true;
-
-	WARN_ONCE(true, "virq out of range %u.\n", virq);
-	return false;
-}
-
 static int octeon_irq_ciu_map(struct irq_domain *d,
 			      unsigned int virq, irq_hw_number_t hw)
 {
@@ -1176,13 +1226,6 @@
 	unsigned int bit = hw & 63;
 	struct octeon_irq_ciu_domain_data *dd = d->host_data;
 
-	if (!octeon_irq_virq_in_range(virq))
-		return -EINVAL;
-
-	/* Don't map irq if it is reserved for GPIO. */
-	if (line == 0 && bit >= 16 && bit <32)
-		return 0;
-
 	if (line >= dd->num_sum || octeon_irq_ciu_to_irq[line][bit] != 0)
 		return -EINVAL;
 
@@ -1215,9 +1258,6 @@
 	unsigned int line, bit;
 	int r;
 
-	if (!octeon_irq_virq_in_range(virq))
-		return -EINVAL;
-
 	line = (hw + gpiod->base_hwirq) >> 6;
 	bit = (hw + gpiod->base_hwirq) & 63;
 	if (line > ARRAY_SIZE(octeon_irq_ciu_to_irq) ||
@@ -1899,9 +1939,6 @@
 	unsigned int line = hw >> 6;
 	unsigned int bit = hw & 63;
 
-	if (!octeon_irq_virq_in_range(virq))
-		return -EINVAL;
-
 	/*
 	 * Don't map irq if it is reserved for GPIO.
 	 * (Line 7 are the GPIO lines.)
@@ -2294,10 +2331,598 @@
 	return 0;
 }
 
+int octeon_irq_ciu3_xlat(struct irq_domain *d,
+			 struct device_node *node,
+			 const u32 *intspec,
+			 unsigned int intsize,
+			 unsigned long *out_hwirq,
+			 unsigned int *out_type)
+{
+	struct octeon_ciu3_info *ciu3_info = d->host_data;
+	unsigned int hwirq, type, intsn_major;
+	union cvmx_ciu3_iscx_ctl isc;
+
+	if (intsize < 2)
+		return -EINVAL;
+	hwirq = intspec[0];
+	type = intspec[1];
+
+	if (hwirq >= (1 << 20))
+		return -EINVAL;
+
+	intsn_major = hwirq >> 12;
+	switch (intsn_major) {
+	case 0x04: /* Software handled separately. */
+		return -EINVAL;
+	default:
+		break;
+	}
+
+	isc.u64 =  cvmx_read_csr(ciu3_info->ciu3_addr + CIU3_ISC_CTL(hwirq));
+	if (!isc.s.imp)
+		return -EINVAL;
+
+	switch (type) {
+	case 4: /* official value for level triggering. */
+		*out_type = IRQ_TYPE_LEVEL_HIGH;
+		break;
+	case 0: /* unofficial value, but we might as well let it work. */
+	case 1: /* official value for edge triggering. */
+		*out_type = IRQ_TYPE_EDGE_RISING;
+		break;
+	default: /* Nothing else is acceptable. */
+		return -EINVAL;
+	}
+
+	*out_hwirq = hwirq;
+
+	return 0;
+}
+
+void octeon_irq_ciu3_enable(struct irq_data *data)
+{
+	int cpu;
+	union cvmx_ciu3_iscx_ctl isc_ctl;
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	u64 isc_ctl_addr;
+
+	struct octeon_ciu_chip_data *cd;
+
+	cpu = next_cpu_for_irq(data);
+
+	cd = irq_data_get_irq_chip_data(data);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.en = 1;
+	cvmx_write_csr(cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn), isc_w1c.u64);
+
+	isc_ctl_addr = cd->ciu3_addr + CIU3_ISC_CTL(cd->intsn);
+	isc_ctl.u64 = 0;
+	isc_ctl.s.en = 1;
+	isc_ctl.s.idt = per_cpu(octeon_irq_ciu3_idt_ip2, cpu);
+	cvmx_write_csr(isc_ctl_addr, isc_ctl.u64);
+	cvmx_read_csr(isc_ctl_addr);
+}
+
+void octeon_irq_ciu3_disable(struct irq_data *data)
+{
+	u64 isc_ctl_addr;
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+
+	struct octeon_ciu_chip_data *cd;
+
+	cd = irq_data_get_irq_chip_data(data);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.en = 1;
+
+	isc_ctl_addr = cd->ciu3_addr + CIU3_ISC_CTL(cd->intsn);
+	cvmx_write_csr(cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn), isc_w1c.u64);
+	cvmx_write_csr(isc_ctl_addr, 0);
+	cvmx_read_csr(isc_ctl_addr);
+}
+
+void octeon_irq_ciu3_ack(struct irq_data *data)
+{
+	u64 isc_w1c_addr;
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	struct octeon_ciu_chip_data *cd;
+	u32 trigger_type = irqd_get_trigger_type(data);
+
+	/*
+	 * We use a single irq_chip, so we have to do nothing to ack a
+	 * level interrupt.
+	 */
+	if (!(trigger_type & IRQ_TYPE_EDGE_BOTH))
+		return;
+
+	cd = irq_data_get_irq_chip_data(data);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.raw = 1;
+
+	isc_w1c_addr = cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn);
+	cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+	cvmx_read_csr(isc_w1c_addr);
+}
+
+void octeon_irq_ciu3_mask(struct irq_data *data)
+{
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	u64 isc_w1c_addr;
+	struct octeon_ciu_chip_data *cd;
+
+	cd = irq_data_get_irq_chip_data(data);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.en = 1;
+
+	isc_w1c_addr = cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn);
+	cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+	cvmx_read_csr(isc_w1c_addr);
+}
+
+void octeon_irq_ciu3_mask_ack(struct irq_data *data)
+{
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	u64 isc_w1c_addr;
+	struct octeon_ciu_chip_data *cd;
+	u32 trigger_type = irqd_get_trigger_type(data);
+
+	cd = irq_data_get_irq_chip_data(data);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.en = 1;
+
+	/*
+	 * We use a single irq_chip, so only ack an edge (!level)
+	 * interrupt.
+	 */
+	if (trigger_type & IRQ_TYPE_EDGE_BOTH)
+		isc_w1c.s.raw = 1;
+
+	isc_w1c_addr = cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn);
+	cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+	cvmx_read_csr(isc_w1c_addr);
+}
+
+#ifdef CONFIG_SMP
+int octeon_irq_ciu3_set_affinity(struct irq_data *data,
+				 const struct cpumask *dest, bool force)
+{
+	union cvmx_ciu3_iscx_ctl isc_ctl;
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	u64 isc_ctl_addr;
+	int cpu;
+	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
+	struct octeon_ciu_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	if (!cpumask_subset(dest, cpumask_of_node(cd->ciu_node)))
+		return -EINVAL;
+
+	if (!enable_one)
+		return IRQ_SET_MASK_OK;
+
+	cd = irq_data_get_irq_chip_data(data);
+	cpu = cpumask_first(dest);
+	if (cpu >= nr_cpu_ids)
+		cpu = smp_processor_id();
+	cd->current_cpu = cpu;
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.en = 1;
+	cvmx_write_csr(cd->ciu3_addr + CIU3_ISC_W1C(cd->intsn), isc_w1c.u64);
+
+	isc_ctl_addr = cd->ciu3_addr + CIU3_ISC_CTL(cd->intsn);
+	isc_ctl.u64 = 0;
+	isc_ctl.s.en = 1;
+	isc_ctl.s.idt = per_cpu(octeon_irq_ciu3_idt_ip2, cpu);
+	cvmx_write_csr(isc_ctl_addr, isc_ctl.u64);
+	cvmx_read_csr(isc_ctl_addr);
+
+	return IRQ_SET_MASK_OK;
+}
+#endif
+
+static struct irq_chip octeon_irq_chip_ciu3 = {
+	.name = "CIU3",
+	.irq_startup = edge_startup,
+	.irq_enable = octeon_irq_ciu3_enable,
+	.irq_disable = octeon_irq_ciu3_disable,
+	.irq_ack = octeon_irq_ciu3_ack,
+	.irq_mask = octeon_irq_ciu3_mask,
+	.irq_mask_ack = octeon_irq_ciu3_mask_ack,
+	.irq_unmask = octeon_irq_ciu3_enable,
+	.irq_set_type = octeon_irq_ciu_set_type,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu3_set_affinity,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+int octeon_irq_ciu3_mapx(struct irq_domain *d, unsigned int virq,
+			 irq_hw_number_t hw, struct irq_chip *chip)
+{
+	struct octeon_ciu3_info *ciu3_info = d->host_data;
+	struct octeon_ciu_chip_data *cd = kzalloc_node(sizeof(*cd), GFP_KERNEL,
+						       ciu3_info->node);
+	if (!cd)
+		return -ENOMEM;
+	cd->intsn = hw;
+	cd->current_cpu = -1;
+	cd->ciu3_addr = ciu3_info->ciu3_addr;
+	cd->ciu_node = ciu3_info->node;
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+	irq_set_chip_data(virq, cd);
+
+	return 0;
+}
+
+static int octeon_irq_ciu3_map(struct irq_domain *d,
+			       unsigned int virq, irq_hw_number_t hw)
+{
+	return octeon_irq_ciu3_mapx(d, virq, hw, &octeon_irq_chip_ciu3);
+}
+
+static struct irq_domain_ops octeon_dflt_domain_ciu3_ops = {
+	.map = octeon_irq_ciu3_map,
+	.unmap = octeon_irq_free_cd,
+	.xlate = octeon_irq_ciu3_xlat,
+};
+
+static void octeon_irq_ciu3_ip2(void)
+{
+	union cvmx_ciu3_destx_pp_int dest_pp_int;
+	struct octeon_ciu3_info *ciu3_info;
+	u64 ciu3_addr;
+
+	ciu3_info = __this_cpu_read(octeon_ciu3_info);
+	ciu3_addr = ciu3_info->ciu3_addr;
+
+	dest_pp_int.u64 = cvmx_read_csr(ciu3_addr + CIU3_DEST_PP_INT(3 * cvmx_get_local_core_num()));
+
+	if (likely(dest_pp_int.s.intr)) {
+		irq_hw_number_t intsn = dest_pp_int.s.intsn;
+		irq_hw_number_t hw;
+		struct irq_domain *domain;
+		/* Get the domain to use from the major block */
+		int block = intsn >> 12;
+		int ret;
+
+		domain = ciu3_info->domain[block];
+		if (ciu3_info->intsn2hw[block])
+			hw = ciu3_info->intsn2hw[block](domain, intsn);
+		else
+			hw = intsn;
+
+		ret = handle_domain_irq(domain, hw, NULL);
+		if (ret < 0) {
+			union cvmx_ciu3_iscx_w1c isc_w1c;
+			u64 isc_w1c_addr = ciu3_addr + CIU3_ISC_W1C(intsn);
+
+			isc_w1c.u64 = 0;
+			isc_w1c.s.en = 1;
+			cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+			cvmx_read_csr(isc_w1c_addr);
+			spurious_interrupt();
+		}
+	} else {
+		spurious_interrupt();
+	}
+}
+
+/*
+ * 10 mbox per core starting from zero.
+ * Base mbox is core * 10
+ */
+static unsigned int octeon_irq_ciu3_base_mbox_intsn(int core)
+{
+	/* SW (mbox) are 0x04 in bits 12..19 */
+	return 0x04000 + CIU3_MBOX_PER_CORE * core;
+}
+
+static unsigned int octeon_irq_ciu3_mbox_intsn_for_core(int core, unsigned int mbox)
+{
+	return octeon_irq_ciu3_base_mbox_intsn(core) + mbox;
+}
+
+static unsigned int octeon_irq_ciu3_mbox_intsn_for_cpu(int cpu, unsigned int mbox)
+{
+	int local_core = octeon_coreid_for_cpu(cpu) & 0x3f;
+
+	return octeon_irq_ciu3_mbox_intsn_for_core(local_core, mbox);
+}
+
+static void octeon_irq_ciu3_mbox(void)
+{
+	union cvmx_ciu3_destx_pp_int dest_pp_int;
+	struct octeon_ciu3_info *ciu3_info;
+	u64 ciu3_addr;
+	int core = cvmx_get_local_core_num();
+
+	ciu3_info = __this_cpu_read(octeon_ciu3_info);
+	ciu3_addr = ciu3_info->ciu3_addr;
+
+	dest_pp_int.u64 = cvmx_read_csr(ciu3_addr + CIU3_DEST_PP_INT(1 + 3 * core));
+
+	if (likely(dest_pp_int.s.intr)) {
+		irq_hw_number_t intsn = dest_pp_int.s.intsn;
+		int mbox = intsn - octeon_irq_ciu3_base_mbox_intsn(core);
+
+		if (likely(mbox >= 0 && mbox < CIU3_MBOX_PER_CORE)) {
+			do_IRQ(mbox + OCTEON_IRQ_MBOX0);
+		} else {
+			union cvmx_ciu3_iscx_w1c isc_w1c;
+			u64 isc_w1c_addr = ciu3_addr + CIU3_ISC_W1C(intsn);
+
+			isc_w1c.u64 = 0;
+			isc_w1c.s.en = 1;
+			cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+			cvmx_read_csr(isc_w1c_addr);
+			spurious_interrupt();
+		}
+	} else {
+		spurious_interrupt();
+	}
+}
+
+void octeon_ciu3_mbox_send(int cpu, unsigned int mbox)
+{
+	struct octeon_ciu3_info *ciu3_info;
+	unsigned int intsn;
+	union cvmx_ciu3_iscx_w1s isc_w1s;
+	u64 isc_w1s_addr;
+
+	if (WARN_ON_ONCE(mbox >= CIU3_MBOX_PER_CORE))
+		return;
+
+	intsn = octeon_irq_ciu3_mbox_intsn_for_cpu(cpu, mbox);
+	ciu3_info = per_cpu(octeon_ciu3_info, cpu);
+	isc_w1s_addr = ciu3_info->ciu3_addr + CIU3_ISC_W1S(intsn);
+
+	isc_w1s.u64 = 0;
+	isc_w1s.s.raw = 1;
+
+	cvmx_write_csr(isc_w1s_addr, isc_w1s.u64);
+	cvmx_read_csr(isc_w1s_addr);
+}
+
+static void octeon_irq_ciu3_mbox_set_enable(struct irq_data *data, int cpu, bool en)
+{
+	struct octeon_ciu3_info *ciu3_info;
+	unsigned int intsn;
+	u64 isc_ctl_addr, isc_w1c_addr;
+	union cvmx_ciu3_iscx_ctl isc_ctl;
+	unsigned int mbox = data->irq - OCTEON_IRQ_MBOX0;
+
+	intsn = octeon_irq_ciu3_mbox_intsn_for_cpu(cpu, mbox);
+	ciu3_info = per_cpu(octeon_ciu3_info, cpu);
+	isc_w1c_addr = ciu3_info->ciu3_addr + CIU3_ISC_W1C(intsn);
+	isc_ctl_addr = ciu3_info->ciu3_addr + CIU3_ISC_CTL(intsn);
+
+	isc_ctl.u64 = 0;
+	isc_ctl.s.en = 1;
+
+	cvmx_write_csr(isc_w1c_addr, isc_ctl.u64);
+	cvmx_write_csr(isc_ctl_addr, 0);
+	if (en) {
+		unsigned int idt = per_cpu(octeon_irq_ciu3_idt_ip3, cpu);
+
+		isc_ctl.u64 = 0;
+		isc_ctl.s.en = 1;
+		isc_ctl.s.idt = idt;
+		cvmx_write_csr(isc_ctl_addr, isc_ctl.u64);
+	}
+	cvmx_read_csr(isc_ctl_addr);
+}
+
+static void octeon_irq_ciu3_mbox_enable(struct irq_data *data)
+{
+	int cpu;
+	unsigned int mbox = data->irq - OCTEON_IRQ_MBOX0;
+
+	WARN_ON(mbox >= CIU3_MBOX_PER_CORE);
+
+	for_each_online_cpu(cpu)
+		octeon_irq_ciu3_mbox_set_enable(data, cpu, true);
+}
+
+static void octeon_irq_ciu3_mbox_disable(struct irq_data *data)
+{
+	int cpu;
+	unsigned int mbox = data->irq - OCTEON_IRQ_MBOX0;
+
+	WARN_ON(mbox >= CIU3_MBOX_PER_CORE);
+
+	for_each_online_cpu(cpu)
+		octeon_irq_ciu3_mbox_set_enable(data, cpu, false);
+}
+
+static void octeon_irq_ciu3_mbox_ack(struct irq_data *data)
+{
+	struct octeon_ciu3_info *ciu3_info;
+	unsigned int intsn;
+	u64 isc_w1c_addr;
+	union cvmx_ciu3_iscx_w1c isc_w1c;
+	unsigned int mbox = data->irq - OCTEON_IRQ_MBOX0;
+
+	intsn = octeon_irq_ciu3_mbox_intsn_for_core(cvmx_get_local_core_num(), mbox);
+
+	isc_w1c.u64 = 0;
+	isc_w1c.s.raw = 1;
+
+	ciu3_info = __this_cpu_read(octeon_ciu3_info);
+	isc_w1c_addr = ciu3_info->ciu3_addr + CIU3_ISC_W1C(intsn);
+	cvmx_write_csr(isc_w1c_addr, isc_w1c.u64);
+	cvmx_read_csr(isc_w1c_addr);
+}
+
+static void octeon_irq_ciu3_mbox_cpu_online(struct irq_data *data)
+{
+	octeon_irq_ciu3_mbox_set_enable(data, smp_processor_id(), true);
+}
+
+static void octeon_irq_ciu3_mbox_cpu_offline(struct irq_data *data)
+{
+	octeon_irq_ciu3_mbox_set_enable(data, smp_processor_id(), false);
+}
+
+static int octeon_irq_ciu3_alloc_resources(struct octeon_ciu3_info *ciu3_info)
+{
+	u64 b = ciu3_info->ciu3_addr;
+	int idt_ip2, idt_ip3, idt_ip4;
+	int unused_idt2;
+	int core = cvmx_get_local_core_num();
+	int i;
+
+	__this_cpu_write(octeon_ciu3_info, ciu3_info);
+
+	/*
+	 * 4 idt per core starting from 1 because zero is reserved.
+	 * Base idt per core is 4 * core + 1
+	 */
+	idt_ip2 = core * 4 + 1;
+	idt_ip3 = core * 4 + 2;
+	idt_ip4 = core * 4 + 3;
+	unused_idt2 = core * 4 + 4;
+	__this_cpu_write(octeon_irq_ciu3_idt_ip2, idt_ip2);
+	__this_cpu_write(octeon_irq_ciu3_idt_ip3, idt_ip3);
+
+	/* ip2 interrupts for this CPU */
+	cvmx_write_csr(b + CIU3_IDT_CTL(idt_ip2), 0);
+	cvmx_write_csr(b + CIU3_IDT_PP(idt_ip2, 0), 1ull << core);
+	cvmx_write_csr(b + CIU3_IDT_IO(idt_ip2), 0);
+
+	/* ip3 interrupts for this CPU */
+	cvmx_write_csr(b + CIU3_IDT_CTL(idt_ip3), 1);
+	cvmx_write_csr(b + CIU3_IDT_PP(idt_ip3, 0), 1ull << core);
+	cvmx_write_csr(b + CIU3_IDT_IO(idt_ip3), 0);
+
+	/* ip4 interrupts for this CPU */
+	cvmx_write_csr(b + CIU3_IDT_CTL(idt_ip4), 2);
+	cvmx_write_csr(b + CIU3_IDT_PP(idt_ip4, 0), 0);
+	cvmx_write_csr(b + CIU3_IDT_IO(idt_ip4), 0);
+
+	cvmx_write_csr(b + CIU3_IDT_CTL(unused_idt2), 0);
+	cvmx_write_csr(b + CIU3_IDT_PP(unused_idt2, 0), 0);
+	cvmx_write_csr(b + CIU3_IDT_IO(unused_idt2), 0);
+
+	for (i = 0; i < CIU3_MBOX_PER_CORE; i++) {
+		unsigned int intsn = octeon_irq_ciu3_mbox_intsn_for_core(core, i);
+
+		cvmx_write_csr(b + CIU3_ISC_W1C(intsn), 2);
+		cvmx_write_csr(b + CIU3_ISC_CTL(intsn), 0);
+	}
+
+	return 0;
+}
+
+static void octeon_irq_setup_secondary_ciu3(void)
+{
+	struct octeon_ciu3_info *ciu3_info;
+
+	ciu3_info = octeon_ciu3_info_per_node[cvmx_get_node_num()];
+	octeon_irq_ciu3_alloc_resources(ciu3_info);
+	irq_cpu_online();
+
+	/* Enable the CIU lines */
+	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
+	if (octeon_irq_use_ip4)
+		set_c0_status(STATUSF_IP4);
+	else
+		clear_c0_status(STATUSF_IP4);
+}
+
+static struct irq_chip octeon_irq_chip_ciu3_mbox = {
+	.name = "CIU3-M",
+	.irq_enable = octeon_irq_ciu3_mbox_enable,
+	.irq_disable = octeon_irq_ciu3_mbox_disable,
+	.irq_ack = octeon_irq_ciu3_mbox_ack,
+
+	.irq_cpu_online = octeon_irq_ciu3_mbox_cpu_online,
+	.irq_cpu_offline = octeon_irq_ciu3_mbox_cpu_offline,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+static int __init octeon_irq_init_ciu3(struct device_node *ciu_node,
+				       struct device_node *parent)
+{
+	int i;
+	int node;
+	struct irq_domain *domain;
+	struct octeon_ciu3_info *ciu3_info;
+	const __be32 *zero_addr;
+	u64 base_addr;
+	union cvmx_ciu3_const consts;
+
+	node = 0; /* of_node_to_nid(ciu_node); */
+	ciu3_info = kzalloc_node(sizeof(*ciu3_info), GFP_KERNEL, node);
+
+	if (!ciu3_info)
+		return -ENOMEM;
+
+	zero_addr = of_get_address(ciu_node, 0, NULL, NULL);
+	if (WARN_ON(!zero_addr))
+		return -EINVAL;
+
+	base_addr = of_translate_address(ciu_node, zero_addr);
+	base_addr = (u64)phys_to_virt(base_addr);
+
+	ciu3_info->ciu3_addr = base_addr;
+	ciu3_info->node = node;
+
+	consts.u64 = cvmx_read_csr(base_addr + CIU3_CONST);
+
+	octeon_irq_setup_secondary = octeon_irq_setup_secondary_ciu3;
+
+	octeon_irq_ip2 = octeon_irq_ciu3_ip2;
+	octeon_irq_ip3 = octeon_irq_ciu3_mbox;
+	octeon_irq_ip4 = octeon_irq_ip4_mask;
+
+	if (node == cvmx_get_node_num()) {
+		/* Mips internal */
+		octeon_irq_init_core();
+
+		/* Only do per CPU things if it is the CIU of the boot node. */
+		i = irq_alloc_descs_from(OCTEON_IRQ_MBOX0, 8, node);
+		WARN_ON(i < 0);
+
+		for (i = 0; i < 8; i++)
+			irq_set_chip_and_handler(i + OCTEON_IRQ_MBOX0,
+						 &octeon_irq_chip_ciu3_mbox, handle_percpu_irq);
+	}
+
+	/*
+	 * Initialize all domains to use the default domain. Specific major
+	 * blocks will overwrite the default domain as needed.
+	 */
+	domain = irq_domain_add_tree(ciu_node, &octeon_dflt_domain_ciu3_ops,
+				     ciu3_info);
+	for (i = 0; i < MAX_CIU3_DOMAINS; i++)
+		ciu3_info->domain[i] = domain;
+
+	octeon_ciu3_info_per_node[node] = ciu3_info;
+
+	if (node == cvmx_get_node_num()) {
+		/* Only do per CPU things if it is the CIU of the boot node. */
+		octeon_irq_ciu3_alloc_resources(ciu3_info);
+		if (node == 0)
+			irq_set_default_host(domain);
+
+		octeon_irq_use_ip4 = false;
+		/* Enable the CIU lines */
+		set_c0_status(STATUSF_IP2 | STATUSF_IP3);
+		clear_c0_status(STATUSF_IP4);
+	}
+
+	return 0;
+}
+
 static struct of_device_id ciu_types[] __initdata = {
 	{.compatible = "cavium,octeon-3860-ciu", .data = octeon_irq_init_ciu},
 	{.compatible = "cavium,octeon-3860-gpio", .data = octeon_irq_init_gpio},
 	{.compatible = "cavium,octeon-6880-ciu2", .data = octeon_irq_init_ciu2},
+	{.compatible = "cavium,octeon-7890-ciu3", .data = octeon_irq_init_ciu3},
 	{.compatible = "cavium,octeon-7130-cib", .data = octeon_irq_init_cib},
 	{}
 };
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index d113c8d..7aeafed 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -13,6 +13,7 @@
 #include <linux/i2c.h>
 #include <linux/usb.h>
 #include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -525,10 +526,17 @@
 
 static void __init octeon_fdt_set_mac_addr(int n, u64 *pmac)
 {
+	const u8 *old_mac;
+	int old_len;
 	u8 new_mac[6];
 	u64 mac = *pmac;
 	int r;
 
+	old_mac = fdt_getprop(initial_boot_params, n, "local-mac-address",
+			      &old_len);
+	if (!old_mac || old_len != 6 || is_valid_ether_addr(old_mac))
+		return;
+
 	new_mac[0] = (mac >> 40) & 0xff;
 	new_mac[1] = (mac >> 32) & 0xff;
 	new_mac[2] = (mac >> 24) & 0xff;
@@ -560,7 +568,7 @@
 	fdt_nop_node(initial_boot_params, node);
 }
 
-static void __init octeon_fdt_pip_port(int iface, int i, int p, int max, u64 *pmac)
+static void __init octeon_fdt_pip_port(int iface, int i, int p, int max)
 {
 	char name_buffer[20];
 	int eth;
@@ -583,10 +591,9 @@
 
 	phy_addr = cvmx_helper_board_get_mii_address(ipd_port);
 	octeon_fdt_set_phy(eth, phy_addr);
-	octeon_fdt_set_mac_addr(eth, pmac);
 }
 
-static void __init octeon_fdt_pip_iface(int pip, int idx, u64 *pmac)
+static void __init octeon_fdt_pip_iface(int pip, int idx)
 {
 	char name_buffer[20];
 	int iface;
@@ -602,7 +609,73 @@
 		count = cvmx_helper_ports_on_interface(idx);
 
 	for (p = 0; p < 16; p++)
-		octeon_fdt_pip_port(iface, idx, p, count - 1, pmac);
+		octeon_fdt_pip_port(iface, idx, p, count - 1);
+}
+
+void __init octeon_fill_mac_addresses(void)
+{
+	const char *alias_prop;
+	char name_buffer[20];
+	u64 mac_addr_base;
+	int aliases;
+	int pip;
+	int i;
+
+	aliases = fdt_path_offset(initial_boot_params, "/aliases");
+	if (aliases < 0)
+		return;
+
+	mac_addr_base =
+		((octeon_bootinfo->mac_addr_base[0] & 0xffull)) << 40 |
+		((octeon_bootinfo->mac_addr_base[1] & 0xffull)) << 32 |
+		((octeon_bootinfo->mac_addr_base[2] & 0xffull)) << 24 |
+		((octeon_bootinfo->mac_addr_base[3] & 0xffull)) << 16 |
+		((octeon_bootinfo->mac_addr_base[4] & 0xffull)) << 8 |
+		 (octeon_bootinfo->mac_addr_base[5] & 0xffull);
+
+	for (i = 0; i < 2; i++) {
+		int mgmt;
+
+		snprintf(name_buffer, sizeof(name_buffer), "mix%d", i);
+		alias_prop = fdt_getprop(initial_boot_params, aliases,
+					 name_buffer, NULL);
+		if (!alias_prop)
+			continue;
+		mgmt = fdt_path_offset(initial_boot_params, alias_prop);
+		if (mgmt < 0)
+			continue;
+		octeon_fdt_set_mac_addr(mgmt, &mac_addr_base);
+	}
+
+	alias_prop = fdt_getprop(initial_boot_params, aliases, "pip", NULL);
+	if (!alias_prop)
+		return;
+
+	pip = fdt_path_offset(initial_boot_params, alias_prop);
+	if (pip < 0)
+		return;
+
+	for (i = 0; i <= 4; i++) {
+		int iface;
+		int p;
+
+		snprintf(name_buffer, sizeof(name_buffer), "interface@%d", i);
+		iface = fdt_subnode_offset(initial_boot_params, pip,
+					   name_buffer);
+		if (iface < 0)
+			continue;
+		for (p = 0; p < 16; p++) {
+			int eth;
+
+			snprintf(name_buffer, sizeof(name_buffer),
+				 "ethernet@%x", p);
+			eth = fdt_subnode_offset(initial_boot_params, iface,
+						 name_buffer);
+			if (eth < 0)
+				continue;
+			octeon_fdt_set_mac_addr(eth, &mac_addr_base);
+		}
+	}
 }
 
 int __init octeon_prune_device_tree(void)
@@ -612,7 +685,6 @@
 	const char *alias_prop;
 	char name_buffer[20];
 	int aliases;
-	u64 mac_addr_base;
 
 	if (fdt_check_header(initial_boot_params))
 		panic("Corrupt Device Tree.");
@@ -623,15 +695,6 @@
 		return -EINVAL;
 	}
 
-
-	mac_addr_base =
-		((octeon_bootinfo->mac_addr_base[0] & 0xffull)) << 40 |
-		((octeon_bootinfo->mac_addr_base[1] & 0xffull)) << 32 |
-		((octeon_bootinfo->mac_addr_base[2] & 0xffull)) << 24 |
-		((octeon_bootinfo->mac_addr_base[3] & 0xffull)) << 16 |
-		((octeon_bootinfo->mac_addr_base[4] & 0xffull)) << 8 |
-		(octeon_bootinfo->mac_addr_base[5] & 0xffull);
-
 	if (OCTEON_IS_MODEL(OCTEON_CN52XX) || OCTEON_IS_MODEL(OCTEON_CN63XX))
 		max_port = 2;
 	else if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN68XX))
@@ -660,7 +723,6 @@
 			} else {
 				int phy_addr = cvmx_helper_board_get_mii_address(CVMX_HELPER_BOARD_MGMT_IPD_PORT + i);
 				octeon_fdt_set_phy(mgmt, phy_addr);
-				octeon_fdt_set_mac_addr(mgmt, &mac_addr_base);
 			}
 		}
 	}
@@ -670,7 +732,7 @@
 		int pip = fdt_path_offset(initial_boot_params, pip_path);
 		if (pip	 >= 0)
 			for (i = 0; i <= 4; i++)
-				octeon_fdt_pip_iface(pip, i, &mac_addr_base);
+				octeon_fdt_pip_iface(pip, i);
 	}
 
 	/* I2C */
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index cd7101f..64f852b 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -43,8 +43,6 @@
 #include <asm/octeon/cvmx-mio-defs.h>
 #include <asm/octeon/cvmx-rst-defs.h>
 
-extern struct plat_smp_ops octeon_smp_ops;
-
 #ifdef CONFIG_PCI
 extern void pci_console_init(const char *arg);
 #endif
@@ -466,15 +464,25 @@
 
 static char __read_mostly octeon_system_type[80];
 
-static int __init init_octeon_system_type(void)
+static void __init init_octeon_system_type(void)
 {
-	snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)",
-		cvmx_board_type_to_string(octeon_bootinfo->board_type),
-		octeon_model_get_string(read_c0_prid()));
+	char const *board_type;
 
-	return 0;
+	board_type = cvmx_board_type_to_string(octeon_bootinfo->board_type);
+	if (board_type == NULL) {
+		struct device_node *root;
+		int ret;
+
+		root = of_find_node_by_path("/");
+		ret = of_property_read_string(root, "model", &board_type);
+		of_node_put(root);
+		if (ret)
+			board_type = "Unsupported Board";
+	}
+
+	snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)",
+		 board_type, octeon_model_get_string(read_c0_prid()));
 }
-early_initcall(init_octeon_system_type);
 
 /**
  * Return a string representing the system type
@@ -492,8 +500,6 @@
 void octeon_user_io_init(void)
 {
 	union octeon_cvmemctl cvmmemctl;
-	union cvmx_iob_fau_timeout fau_timeout;
-	union cvmx_pow_nw_tim nm_tim;
 
 	/* Get the current settings for CP0_CVMMEMCTL_REG */
 	cvmmemctl.u64 = read_c0_cvmmemctl();
@@ -595,17 +601,27 @@
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
 
-	/* Set a default for the hardware timeouts */
-	fau_timeout.u64 = 0;
-	fau_timeout.s.tout_val = 0xfff;
-	/* Disable tagwait FAU timeout */
-	fau_timeout.s.tout_enb = 0;
-	cvmx_write_csr(CVMX_IOB_FAU_TIMEOUT, fau_timeout.u64);
+	if (octeon_has_feature(OCTEON_FEATURE_FAU)) {
+		union cvmx_iob_fau_timeout fau_timeout;
 
-	nm_tim.u64 = 0;
-	/* 4096 cycles */
-	nm_tim.s.nw_tim = 3;
-	cvmx_write_csr(CVMX_POW_NW_TIM, nm_tim.u64);
+		/* Set a default for the hardware timeouts */
+		fau_timeout.u64 = 0;
+		fau_timeout.s.tout_val = 0xfff;
+		/* Disable tagwait FAU timeout */
+		fau_timeout.s.tout_enb = 0;
+		cvmx_write_csr(CVMX_IOB_FAU_TIMEOUT, fau_timeout.u64);
+	}
+
+	if ((!OCTEON_IS_MODEL(OCTEON_CN68XX) &&
+	     !OCTEON_IS_MODEL(OCTEON_CN7XXX)) ||
+	    OCTEON_IS_MODEL(OCTEON_CN70XX)) {
+		union cvmx_pow_nw_tim nm_tim;
+
+		nm_tim.u64 = 0;
+		/* 4096 cycles */
+		nm_tim.s.nw_tim = 3;
+		cvmx_write_csr(CVMX_POW_NW_TIM, nm_tim.u64);
+	}
 
 	write_octeon_c0_icacheerr(0);
 	write_c0_derraddr1(0);
@@ -637,9 +653,22 @@
 	sysinfo = cvmx_sysinfo_get();
 	memset(sysinfo, 0, sizeof(*sysinfo));
 	sysinfo->system_dram_size = octeon_bootinfo->dram_size << 20;
-	sysinfo->phy_mem_desc_ptr =
-		cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr);
-	sysinfo->core_mask = octeon_bootinfo->core_mask;
+	sysinfo->phy_mem_desc_addr = (u64)phys_to_virt(octeon_bootinfo->phy_mem_desc_addr);
+
+	if ((octeon_bootinfo->major_version > 1) ||
+	    (octeon_bootinfo->major_version == 1 &&
+	     octeon_bootinfo->minor_version >= 4))
+		cvmx_coremask_copy(&sysinfo->core_mask,
+				   &octeon_bootinfo->ext_core_mask);
+	else
+		cvmx_coremask_set64(&sysinfo->core_mask,
+				    octeon_bootinfo->core_mask);
+
+	/* Some broken u-boot pass garbage in upper bits, clear them out */
+	if (!OCTEON_IS_MODEL(OCTEON_CN78XX))
+		for (i = 512; i < 1024; i++)
+			cvmx_coremask_clear_core(&sysinfo->core_mask, i);
+
 	sysinfo->exception_base_addr = octeon_bootinfo->exception_base_addr;
 	sysinfo->cpu_clock_hz = octeon_bootinfo->eclock_hz;
 	sysinfo->dram_data_rate_hz = octeon_bootinfo->dclock_hz * 2;
@@ -867,7 +896,7 @@
 #endif
 
 	octeon_user_io_init();
-	register_smp_ops(&octeon_smp_ops);
+	octeon_setup_smp();
 }
 
 /* Exclude a single page from the regions obtained in plat_mem_setup. */
@@ -1079,6 +1108,7 @@
 	}
 }
 
+void __init octeon_fill_mac_addresses(void);
 int octeon_prune_device_tree(void);
 
 extern const char __appended_dtb;
@@ -1088,11 +1118,13 @@
 {
 	const void *fdt;
 	bool do_prune;
+	bool fill_mac;
 
 #ifdef CONFIG_MIPS_ELF_APPENDED_DTB
 	if (!fdt_check_header(&__appended_dtb)) {
 		fdt = &__appended_dtb;
 		do_prune = false;
+		fill_mac = true;
 		pr_info("Using appended Device Tree.\n");
 	} else
 #endif
@@ -1101,13 +1133,16 @@
 		if (fdt_check_header(fdt))
 			panic("Corrupt Device Tree passed to kernel.");
 		do_prune = false;
+		fill_mac = false;
 		pr_info("Using passed Device Tree.\n");
 	} else if (OCTEON_IS_MODEL(OCTEON_CN68XX)) {
 		fdt = &__dtb_octeon_68xx_begin;
 		do_prune = true;
+		fill_mac = true;
 	} else {
 		fdt = &__dtb_octeon_3xxx_begin;
 		do_prune = true;
+		fill_mac = true;
 	}
 
 	initial_boot_params = (void *)fdt;
@@ -1116,7 +1151,10 @@
 		octeon_prune_device_tree();
 		pr_info("Using internal Device Tree.\n");
 	}
+	if (fill_mac)
+		octeon_fill_mac_addresses();
 	unflatten_and_copy_device_tree();
+	init_octeon_system_type();
 }
 
 static int __initdata disable_octeon_edac_p;
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 42412ba..dff88aa 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -30,25 +30,55 @@
 EXPORT_SYMBOL(octeon_bootloader_entry_addr);
 #endif
 
+static void octeon_icache_flush(void)
+{
+	asm volatile ("synci 0($0)\n");
+}
+
+static void (*octeon_message_functions[8])(void) = {
+	scheduler_ipi,
+	generic_smp_call_function_interrupt,
+	octeon_icache_flush,
+};
+
 static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 {
-	const int coreid = cvmx_get_core_num();
-	uint64_t action;
+	u64 mbox_clrx = CVMX_CIU_MBOX_CLRX(cvmx_get_core_num());
+	u64 action;
+	int i;
 
-	/* Load the mailbox register to figure out what we're supposed to do */
-	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff;
+	/*
+	 * Make sure the function array initialization remains
+	 * correct.
+	 */
+	BUILD_BUG_ON(SMP_RESCHEDULE_YOURSELF != (1 << 0));
+	BUILD_BUG_ON(SMP_CALL_FUNCTION       != (1 << 1));
+	BUILD_BUG_ON(SMP_ICACHE_FLUSH        != (1 << 2));
+
+	/*
+	 * Load the mailbox register to figure out what we're supposed
+	 * to do.
+	 */
+	action = cvmx_read_csr(mbox_clrx);
+
+	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
+		action &= 0xff;
+	else
+		action &= 0xffff;
 
 	/* Clear the mailbox to clear the interrupt */
-	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
+	cvmx_write_csr(mbox_clrx, action);
 
-	if (action & SMP_CALL_FUNCTION)
-		generic_smp_call_function_interrupt();
-	if (action & SMP_RESCHEDULE_YOURSELF)
-		scheduler_ipi();
+	for (i = 0; i < ARRAY_SIZE(octeon_message_functions) && action;) {
+		if (action & 1) {
+			void (*fn)(void) = octeon_message_functions[i];
 
-	/* Check if we've been told to flush the icache */
-	if (action & SMP_ICACHE_FLUSH)
-		asm volatile ("synci 0($0)\n");
+			if (fn)
+				fn();
+		}
+		action >>= 1;
+		i++;
+	}
 	return IRQ_HANDLED;
 }
 
@@ -97,13 +127,15 @@
 #endif
 }
 
-static void octeon_smp_setup(void)
+static void __init octeon_smp_setup(void)
 {
 	const int coreid = cvmx_get_core_num();
 	int cpus;
 	int id;
-	int core_mask = octeon_get_boot_coremask();
+	struct cvmx_sysinfo *sysinfo = cvmx_sysinfo_get();
+
 #ifdef CONFIG_HOTPLUG_CPU
+	int core_mask = octeon_get_boot_coremask();
 	unsigned int num_cores = cvmx_octeon_num_cores();
 #endif
 
@@ -119,7 +151,7 @@
 	/* The present CPUs get the lowest CPU numbers. */
 	cpus = 1;
 	for (id = 0; id < NR_CPUS; id++) {
-		if ((id != coreid) && (core_mask & (1 << id))) {
+		if ((id != coreid) && cvmx_coremask_is_core_set(&sysinfo->core_mask, id)) {
 			set_cpu_possible(cpus, true);
 			set_cpu_present(cpus, true);
 			__cpu_number_map[id] = cpus;
@@ -196,7 +228,7 @@
  * Callout to firmware before smp_init
  *
  */
-void octeon_prepare_cpus(unsigned int max_cpus)
+static void __init octeon_prepare_cpus(unsigned int max_cpus)
 {
 	/*
 	 * Only the low order mailbox bits are used for IPIs, leave
@@ -242,7 +274,7 @@
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	octeon_fixup_irqs();
 
-	flush_cache_all();
+	__flush_cache_all();
 	local_flush_tlb_all();
 
 	return 0;
@@ -388,3 +420,92 @@
 	.cpu_die		= octeon_cpu_die,
 #endif
 };
+
+static irqreturn_t octeon_78xx_reched_interrupt(int irq, void *dev_id)
+{
+	scheduler_ipi();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t octeon_78xx_call_function_interrupt(int irq, void *dev_id)
+{
+	generic_smp_call_function_interrupt();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t octeon_78xx_icache_flush_interrupt(int irq, void *dev_id)
+{
+	octeon_icache_flush();
+	return IRQ_HANDLED;
+}
+
+/*
+ * Callout to firmware before smp_init
+ */
+static void octeon_78xx_prepare_cpus(unsigned int max_cpus)
+{
+	if (request_irq(OCTEON_IRQ_MBOX0 + 0,
+			octeon_78xx_reched_interrupt,
+			IRQF_PERCPU | IRQF_NO_THREAD, "Scheduler",
+			octeon_78xx_reched_interrupt)) {
+		panic("Cannot request_irq for SchedulerIPI");
+	}
+	if (request_irq(OCTEON_IRQ_MBOX0 + 1,
+			octeon_78xx_call_function_interrupt,
+			IRQF_PERCPU | IRQF_NO_THREAD, "SMP-Call",
+			octeon_78xx_call_function_interrupt)) {
+		panic("Cannot request_irq for SMP-Call");
+	}
+	if (request_irq(OCTEON_IRQ_MBOX0 + 2,
+			octeon_78xx_icache_flush_interrupt,
+			IRQF_PERCPU | IRQF_NO_THREAD, "ICache-Flush",
+			octeon_78xx_icache_flush_interrupt)) {
+		panic("Cannot request_irq for ICache-Flush");
+	}
+}
+
+static void octeon_78xx_send_ipi_single(int cpu, unsigned int action)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (action & 1)
+			octeon_ciu3_mbox_send(cpu, i);
+		action >>= 1;
+	}
+}
+
+static void octeon_78xx_send_ipi_mask(const struct cpumask *mask,
+				      unsigned int action)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		octeon_78xx_send_ipi_single(cpu, action);
+}
+
+static struct plat_smp_ops octeon_78xx_smp_ops = {
+	.send_ipi_single	= octeon_78xx_send_ipi_single,
+	.send_ipi_mask		= octeon_78xx_send_ipi_mask,
+	.init_secondary		= octeon_init_secondary,
+	.smp_finish		= octeon_smp_finish,
+	.boot_secondary		= octeon_boot_secondary,
+	.smp_setup		= octeon_smp_setup,
+	.prepare_cpus		= octeon_78xx_prepare_cpus,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= octeon_cpu_disable,
+	.cpu_die		= octeon_cpu_die,
+#endif
+};
+
+void __init octeon_setup_smp(void)
+{
+	struct plat_smp_ops *ops;
+
+	if (octeon_has_feature(OCTEON_FEATURE_CIU3))
+		ops = &octeon_78xx_smp_ops;
+	else
+		ops = &octeon_smp_ops;
+
+	register_smp_ops(ops);
+}
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index 0db4eb3..fad8e96 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -23,7 +23,6 @@
 CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
 CONFIG_SYN_COOKIES=y
 CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
 CONFIG_IPV6_MROUTE=y
diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig
index 3fec264..5599a9f 100644
--- a/arch/mips/configs/bcm63xx_defconfig
+++ b/arch/mips/configs/bcm63xx_defconfig
@@ -44,6 +44,7 @@
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 CONFIG_MTD=y
+CONFIG_MTD_BCM63XX_PARTS=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index e070dac..d20b09d 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -62,7 +62,6 @@
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 # CONFIG_INET_LRO is not set
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_IPV6_OPTIMISTIC_DAD=y
diff --git a/arch/mips/configs/bmips_be_defconfig b/arch/mips/configs/bmips_be_defconfig
index 24dcb90..acf7785 100644
--- a/arch/mips/configs/bmips_be_defconfig
+++ b/arch/mips/configs/bmips_be_defconfig
@@ -36,6 +36,7 @@
 CONFIG_PRINTK_TIME=y
 CONFIG_BRCMSTB_GISB_ARB=y
 CONFIG_MTD=y
+CONFIG_MTD_BCM63XX_PARTS=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index e57058d..dcac308 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -119,14 +119,16 @@
 CONFIG_SPI_OCTEON=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_USB_EHCI_BIG_ENDIAN_MMIO=y
-CONFIG_USB_OHCI_BIG_ENDIAN_MMIO=y
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_HCD_PLATFORM=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PLATFORM=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_STAGING=y
 CONFIG_OCTEON_ETHERNET=y
+CONFIG_OCTEON_USB=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -152,6 +154,9 @@
 CONFIG_SECURITY_NETWORK=y
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MD5_OCTEON=y
+CONFIG_CRYPTO_SHA1_OCTEON=m
+CONFIG_CRYPTO_SHA256_OCTEON=m
+CONFIG_CRYPTO_SHA512_OCTEON=m
 CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index ebc011c..2b6cb41 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -30,7 +30,6 @@
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_INET6_AH=m
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 6ba9ce9..5d83ff7 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -48,7 +48,6 @@
 CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_IPV6_OPTIMISTIC_DAD=y
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 77e9f50..2b74aee 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -43,7 +43,6 @@
 CONFIG_INET_XFRM_MODE_BEET=m
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_IPV6_OPTIMISTIC_DAD=y
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index a5e85e1..3019fce 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -34,7 +34,6 @@
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_INET6_AH=m
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index d1f198b..5da76e0 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -71,7 +71,6 @@
 CONFIG_TCP_CONG_BIC=y
 CONFIG_DEFAULT_BIC=y
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_TUNNEL=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
diff --git a/arch/mips/configs/ls1b_defconfig b/arch/mips/configs/loongson1b_defconfig
similarity index 84%
rename from arch/mips/configs/ls1b_defconfig
rename to arch/mips/configs/loongson1b_defconfig
index 1b2cc1f..c442f27 100644
--- a/arch/mips/configs/ls1b_defconfig
+++ b/arch/mips/configs/loongson1b_defconfig
@@ -1,19 +1,17 @@
 CONFIG_MACH_LOONGSON32=y
 CONFIG_PREEMPT=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_KERNEL_XZ=y
 CONFIG_SYSVIPC=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_NAMESPACES=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
@@ -41,6 +39,12 @@
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_STANDALONE is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_LOONGSON1=y
+CONFIG_MTD_UBI=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_SCSI=m
 # CONFIG_SCSI_PROC_FS is not set
@@ -48,7 +52,6 @@
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
@@ -56,7 +59,6 @@
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 CONFIG_STMMAC_ETH=y
-CONFIG_STMMAC_DA=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
@@ -69,18 +71,25 @@
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_LOONGSON1=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-CONFIG_USB_HID=m
 CONFIG_HID_GENERIC=m
+CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_HCD_PLATFORM=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=m
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_PL2303=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_LOONGSON1=y
 # CONFIG_IOMMU_SUPPORT is not set
@@ -96,15 +105,21 @@
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
-# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_UBIFS_FS=y
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
+CONFIG_UBIFS_ATIME_SUPPORT=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_ISO8859_1=m
+CONFIG_DYNAMIC_DEBUG=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
 # CONFIG_EARLY_PRINTK is not set
+# CONFIG_CRYPTO_ECHAINIV is not set
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 9b6926d..f3f6005 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -51,7 +51,6 @@
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_IPV6_PRIVACY=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index b3d1d37..b496c25 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -95,7 +95,6 @@
 CONFIG_TCP_CONG_ILLINOIS=m
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 3d8016d..8e99ad8 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -75,7 +75,6 @@
 CONFIG_TCP_CONG_ILLINOIS=m
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 82db4e3..c2b4e3f 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -37,7 +37,6 @@
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_INET6_AH=m
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index a0b8943..1c3bf9f 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -60,6 +60,7 @@
 int dec_tc_bus;
 
 DEFINE_SPINLOCK(ioasic_ssr_lock);
+EXPORT_SYMBOL(ioasic_ssr_lock);
 
 volatile u32 *ioasic_base;
 
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index c7fe4d0..9740066 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1,5 +1,6 @@
 # MIPS headers
 generic-(CONFIG_GENERIC_CSUM) += checksum.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += dma-contiguous.h
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 867f924..6741673 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -235,6 +235,7 @@
 	.macro	ld_b	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	ld.b	$w\wd, \off(\base)
 	.set	pop
@@ -243,6 +244,7 @@
 	.macro	ld_h	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	ld.h	$w\wd, \off(\base)
 	.set	pop
@@ -251,6 +253,7 @@
 	.macro	ld_w	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	ld.w	$w\wd, \off(\base)
 	.set	pop
@@ -268,6 +271,7 @@
 	.macro	st_b	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	st.b	$w\wd, \off(\base)
 	.set	pop
@@ -276,6 +280,7 @@
 	.macro	st_h	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	st.h	$w\wd, \off(\base)
 	.set	pop
@@ -284,6 +289,7 @@
 	.macro	st_w	wd, off, base
 	.set	push
 	.set	mips32r2
+	.set	fp=64
 	.set	msa
 	st.w	$w\wd, \off(\base)
 	.set	pop
@@ -298,21 +304,21 @@
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	ws, n
+	.macro	copy_s_w	ws, n
 	.set	push
 	.set	mips32r2
 	.set	fp=64
 	.set	msa
-	copy_u.w $1, $w\ws[\n]
+	copy_s.w $1, $w\ws[\n]
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	ws, n
+	.macro	copy_s_d	ws, n
 	.set	push
 	.set	mips64r2
 	.set	fp=64
 	.set	msa
-	copy_u.d $1, $w\ws[\n]
+	copy_s.d $1, $w\ws[\n]
 	.set	pop
 	.endm
 
@@ -346,8 +352,8 @@
 #define STH_MSA_INSN		0x5800081f
 #define STW_MSA_INSN		0x5800082f
 #define STD_MSA_INSN		0x5800083f
-#define COPY_UW_MSA_INSN	0x58f00056
-#define COPY_UD_MSA_INSN	0x58f80056
+#define COPY_SW_MSA_INSN	0x58b00056
+#define COPY_SD_MSA_INSN	0x58b80056
 #define INSERT_W_MSA_INSN	0x59300816
 #define INSERT_D_MSA_INSN	0x59380816
 #else
@@ -361,8 +367,8 @@
 #define STH_MSA_INSN		0x78000825
 #define STW_MSA_INSN		0x78000826
 #define STD_MSA_INSN		0x78000827
-#define COPY_UW_MSA_INSN	0x78f00059
-#define COPY_UD_MSA_INSN	0x78f80059
+#define COPY_SW_MSA_INSN	0x78b00059
+#define COPY_SD_MSA_INSN	0x78b80059
 #define INSERT_W_MSA_INSN	0x79300819
 #define INSERT_D_MSA_INSN	0x79380819
 #endif
@@ -393,7 +399,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	LDB_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -402,7 +408,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	LDH_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -411,7 +417,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	LDW_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -420,7 +426,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	LDD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -429,7 +435,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	STB_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -438,7 +444,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	STH_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -447,7 +453,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	STW_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -456,26 +462,26 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	addu	$1, \base, \off
+	PTR_ADDU $1, \base, \off
 	.word	STD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	ws, n
+	.macro	copy_s_w	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
-	.word	COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
+	.word	COPY_SW_MSA_INSN | (\n << 16) | (\ws << 11)
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	ws, n
+	.macro	copy_s_d	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
-	.word	COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
+	.word	COPY_SD_MSA_INSN | (\n << 16) | (\ws << 11)
 	.set	pop
 	.endm
 
@@ -496,41 +502,52 @@
 	.endm
 #endif
 
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+#define FPR_BASE_OFFS	THREAD_FPR0
+#define FPR_BASE	$1
+#else
+#define FPR_BASE_OFFS	0
+#define FPR_BASE	\thread
+#endif
+
 	.macro	msa_save_all	thread
-	st_d	0, THREAD_FPR0, \thread
-	st_d	1, THREAD_FPR1, \thread
-	st_d	2, THREAD_FPR2, \thread
-	st_d	3, THREAD_FPR3, \thread
-	st_d	4, THREAD_FPR4, \thread
-	st_d	5, THREAD_FPR5, \thread
-	st_d	6, THREAD_FPR6, \thread
-	st_d	7, THREAD_FPR7, \thread
-	st_d	8, THREAD_FPR8, \thread
-	st_d	9, THREAD_FPR9, \thread
-	st_d	10, THREAD_FPR10, \thread
-	st_d	11, THREAD_FPR11, \thread
-	st_d	12, THREAD_FPR12, \thread
-	st_d	13, THREAD_FPR13, \thread
-	st_d	14, THREAD_FPR14, \thread
-	st_d	15, THREAD_FPR15, \thread
-	st_d	16, THREAD_FPR16, \thread
-	st_d	17, THREAD_FPR17, \thread
-	st_d	18, THREAD_FPR18, \thread
-	st_d	19, THREAD_FPR19, \thread
-	st_d	20, THREAD_FPR20, \thread
-	st_d	21, THREAD_FPR21, \thread
-	st_d	22, THREAD_FPR22, \thread
-	st_d	23, THREAD_FPR23, \thread
-	st_d	24, THREAD_FPR24, \thread
-	st_d	25, THREAD_FPR25, \thread
-	st_d	26, THREAD_FPR26, \thread
-	st_d	27, THREAD_FPR27, \thread
-	st_d	28, THREAD_FPR28, \thread
-	st_d	29, THREAD_FPR29, \thread
-	st_d	30, THREAD_FPR30, \thread
-	st_d	31, THREAD_FPR31, \thread
 	.set	push
 	.set	noat
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+	PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS
+#endif
+	st_d	 0, THREAD_FPR0  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 1, THREAD_FPR1  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 2, THREAD_FPR2  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 3, THREAD_FPR3  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 4, THREAD_FPR4  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 5, THREAD_FPR5  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 6, THREAD_FPR6  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 7, THREAD_FPR7  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 8, THREAD_FPR8  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 9, THREAD_FPR9  - FPR_BASE_OFFS, FPR_BASE
+	st_d	10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE
+	st_d	11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE
+	st_d	12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE
+	st_d	13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE
+	st_d	14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE
+	st_d	15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE
+	st_d	16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE
+	st_d	17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE
+	st_d	18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE
+	st_d	19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE
+	st_d	20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE
+	st_d	21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE
+	st_d	22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE
+	st_d	23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE
+	st_d	24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE
+	st_d	25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE
+	st_d	26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE
+	st_d	27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE
+	st_d	28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE
+	st_d	29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE
+	st_d	30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE
+	st_d	31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE
 	SET_HARDFLOAT
 	_cfcmsa	$1, MSA_CSR
 	sw	$1, THREAD_MSA_CSR(\thread)
@@ -543,41 +560,47 @@
 	SET_HARDFLOAT
 	lw	$1, THREAD_MSA_CSR(\thread)
 	_ctcmsa	MSA_CSR, $1
-	.set	pop
-	ld_d	0, THREAD_FPR0, \thread
-	ld_d	1, THREAD_FPR1, \thread
-	ld_d	2, THREAD_FPR2, \thread
-	ld_d	3, THREAD_FPR3, \thread
-	ld_d	4, THREAD_FPR4, \thread
-	ld_d	5, THREAD_FPR5, \thread
-	ld_d	6, THREAD_FPR6, \thread
-	ld_d	7, THREAD_FPR7, \thread
-	ld_d	8, THREAD_FPR8, \thread
-	ld_d	9, THREAD_FPR9, \thread
-	ld_d	10, THREAD_FPR10, \thread
-	ld_d	11, THREAD_FPR11, \thread
-	ld_d	12, THREAD_FPR12, \thread
-	ld_d	13, THREAD_FPR13, \thread
-	ld_d	14, THREAD_FPR14, \thread
-	ld_d	15, THREAD_FPR15, \thread
-	ld_d	16, THREAD_FPR16, \thread
-	ld_d	17, THREAD_FPR17, \thread
-	ld_d	18, THREAD_FPR18, \thread
-	ld_d	19, THREAD_FPR19, \thread
-	ld_d	20, THREAD_FPR20, \thread
-	ld_d	21, THREAD_FPR21, \thread
-	ld_d	22, THREAD_FPR22, \thread
-	ld_d	23, THREAD_FPR23, \thread
-	ld_d	24, THREAD_FPR24, \thread
-	ld_d	25, THREAD_FPR25, \thread
-	ld_d	26, THREAD_FPR26, \thread
-	ld_d	27, THREAD_FPR27, \thread
-	ld_d	28, THREAD_FPR28, \thread
-	ld_d	29, THREAD_FPR29, \thread
-	ld_d	30, THREAD_FPR30, \thread
-	ld_d	31, THREAD_FPR31, \thread
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+	PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS
+#endif
+	ld_d	 0, THREAD_FPR0  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 1, THREAD_FPR1  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 2, THREAD_FPR2  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 3, THREAD_FPR3  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 4, THREAD_FPR4  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 5, THREAD_FPR5  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 6, THREAD_FPR6  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 7, THREAD_FPR7  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 8, THREAD_FPR8  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 9, THREAD_FPR9  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE
+	.set pop
 	.endm
 
+#undef FPR_BASE_OFFS
+#undef FPR_BASE
+
 	.macro	msa_init_upper wd
 #ifdef CONFIG_64BIT
 	insert_d \wd, 1
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index ce9666c..fa57cef 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -19,25 +19,10 @@
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
+#include <asm/llsc.h>
 #include <asm/sgidefs.h>
 #include <asm/war.h>
 
-#if _MIPS_SZLONG == 32
-#define SZLONG_LOG 5
-#define SZLONG_MASK 31UL
-#define __LL		"ll	"
-#define __SC		"sc	"
-#define __INS		"ins	"
-#define __EXT		"ext	"
-#elif _MIPS_SZLONG == 64
-#define SZLONG_LOG 6
-#define SZLONG_MASK 63UL
-#define __LL		"lld	"
-#define __SC		"scd	"
-#define __INS		"dins	 "
-#define __EXT		"dext	 "
-#endif
-
 /*
  * These are the "slower" versions of the functions and are in bitops.c.
  * These functions call raw_local_irq_{save,restore}().
diff --git a/arch/mips/include/asm/bitrev.h b/arch/mips/include/asm/bitrev.h
new file mode 100644
index 0000000..bc739a4
--- /dev/null
+++ b/arch/mips/include/asm/bitrev.h
@@ -0,0 +1,30 @@
+#ifndef __MIPS_ASM_BITREV_H__
+#define __MIPS_ASM_BITREV_H__
+
+#include <linux/swab.h>
+
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	u32 ret;
+
+	asm("bitswap	%0, %1" : "=r"(ret) : "r"(__swab32(x)));
+	return ret;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	u16 ret;
+
+	asm("bitswap	%0, %1" : "=r"(ret) : "r"(__swab16(x)));
+	return ret;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	u8 ret;
+
+	asm("bitswap	%0, %1" : "=r"(ret) : "r"(x));
+	return ret;
+}
+
+#endif /* __MIPS_ASM_BITREV_H__ */
diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h
index 6d25ad3..a92aee7 100644
--- a/arch/mips/include/asm/bmips.h
+++ b/arch/mips/include/asm/bmips.h
@@ -88,6 +88,7 @@
 
 extern void bmips_ebase_setup(void);
 extern asmlinkage void plat_wired_tlb_setup(void);
+extern void bmips_cpu_setup(void);
 
 static inline unsigned long bmips_read_zscm_reg(unsigned int offset)
 {
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index b603804..9f67033 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -144,4 +144,22 @@
 
 #endif /* CONFIG_SWIOTLB */
 
+#ifdef CONFIG_USE_OF
+/**
+ * plat_get_fdt() - Return a pointer to the platform's device tree blob
+ *
+ * This function provides a platform independent API to get a pointer to the
+ * flattened device tree blob. The interface between bootloader and kernel
+ * is not consistent across platforms so it is necessary to provide this
+ * API such that common startup code can locate the FDT.
+ *
+ * This is used by the KASLR code to get command line arguments and random
+ * seed from the device tree. Any platform wishing to use KASLR should
+ * provide this API and select SYS_SUPPORTS_RELOCATABLE.
+ *
+ * Return: Pointer to the flattened device tree blob.
+ */
+extern void *plat_get_fdt(void);
+#endif /* CONFIG_USE_OF */
+
 #endif /* _ASM_BOOTINFO_H */
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index 723229f..34ed22e 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -51,7 +51,6 @@
 	unsigned long start, unsigned long end);
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
 extern void __flush_dcache_page(struct page *page);
-extern void __flush_icache_page(struct vm_area_struct *vma, struct page *page);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 static inline void flush_dcache_page(struct page *page)
@@ -77,11 +76,6 @@
 static inline void flush_icache_page(struct vm_area_struct *vma,
 	struct page *page)
 {
-	if (!cpu_has_ic_fills_f_dc && (vma->vm_flags & VM_EXEC) &&
-	    Page_dcache_dirty(page)) {
-		__flush_icache_page(vma, page);
-		ClearPageDcacheDirty(page);
-	}
 }
 
 extern void (*flush_icache_range)(unsigned long start, unsigned long end);
@@ -132,6 +126,7 @@
 static inline void flush_kernel_dcache_page(struct page *page)
 {
 	BUG_ON(cpu_has_dc_aliases && PageHighMem(page));
+	flush_dcache_page(page);
 }
 
 /*
diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h
index c3212ff..8031fbc 100644
--- a/arch/mips/include/asm/cacheops.h
+++ b/arch/mips/include/asm/cacheops.h
@@ -21,6 +21,7 @@
 #define Cache_I				0x00
 #define Cache_D				0x01
 #define Cache_T				0x02
+#define Cache_V				0x02 /* Loongson-3 */
 #define Cache_S				0x03
 
 #define Index_Writeback_Inv		0x00
@@ -107,4 +108,9 @@
  */
 #define Hit_Invalidate_I_Loongson2	(Cache_I | 0x00)
 
+/*
+ * Loongson3-specific cacheops
+ */
+#define Index_Writeback_Inv_V		(Cache_V | Index_Writeback_Inv)
+
 #endif	/* __ASM_CACHEOPS_H */
diff --git a/arch/mips/include/asm/clkdev.h b/arch/mips/include/asm/clkdev.h
deleted file mode 100644
index 1b3ad7b..0000000
--- a/arch/mips/include/asm/clkdev.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- *  based on arch/arm/include/asm/clkdev.h
- *
- *  Copyright (C) 2008 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Helper for the clk API to assist looking up a struct clk.
- */
-#ifndef __ASM_CLKDEV_H
-#define __ASM_CLKDEV_H
-
-#include <linux/slab.h>
-
-#ifndef CONFIG_COMMON_CLK
-#define __clk_get(clk)	({ 1; })
-#define __clk_put(clk)	do { } while (0)
-#endif
-
-static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
-{
-	return kzalloc(size, GFP_KERNEL);
-}
-
-#endif
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index eeec8c8..e6f19fc 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -35,6 +35,9 @@
 #ifndef cpu_has_htw
 #define cpu_has_htw		(cpu_data[0].options & MIPS_CPU_HTW)
 #endif
+#ifndef cpu_has_ldpte
+#define cpu_has_ldpte		(cpu_data[0].options & MIPS_CPU_LDPTE)
+#endif
 #ifndef cpu_has_rixiex
 #define cpu_has_rixiex		(cpu_data[0].options & MIPS_CPU_RIXIEX)
 #endif
@@ -117,6 +120,21 @@
 #ifndef kernel_uses_llsc
 #define kernel_uses_llsc	cpu_has_llsc
 #endif
+#ifndef cpu_has_guestctl0ext
+#define cpu_has_guestctl0ext	(cpu_data[0].options & MIPS_CPU_GUESTCTL0EXT)
+#endif
+#ifndef cpu_has_guestctl1
+#define cpu_has_guestctl1	(cpu_data[0].options & MIPS_CPU_GUESTCTL1)
+#endif
+#ifndef cpu_has_guestctl2
+#define cpu_has_guestctl2	(cpu_data[0].options & MIPS_CPU_GUESTCTL2)
+#endif
+#ifndef cpu_has_guestid
+#define cpu_has_guestid		(cpu_data[0].options & MIPS_CPU_GUESTID)
+#endif
+#ifndef cpu_has_drg
+#define cpu_has_drg		(cpu_data[0].options & MIPS_CPU_DRG)
+#endif
 #ifndef cpu_has_mips16
 #define cpu_has_mips16		(cpu_data[0].ases & MIPS_ASE_MIPS16)
 #endif
@@ -142,8 +160,14 @@
 # endif
 #endif
 
+#ifndef cpu_has_lpa
+#define cpu_has_lpa		(cpu_data[0].options & MIPS_CPU_LPA)
+#endif
+#ifndef cpu_has_mvh
+#define cpu_has_mvh		(cpu_data[0].options & MIPS_CPU_MVH)
+#endif
 #ifndef cpu_has_xpa
-#define cpu_has_xpa		(cpu_data[0].options & MIPS_CPU_XPA)
+#define cpu_has_xpa		(cpu_has_lpa && cpu_has_mvh)
 #endif
 #ifndef cpu_has_vtag_icache
 #define cpu_has_vtag_icache	(cpu_data[0].icache.flags & MIPS_CACHE_VTAG)
@@ -307,10 +331,18 @@
 #define cpu_has_dsp2		(cpu_data[0].ases & MIPS_ASE_DSP2P)
 #endif
 
+#ifndef cpu_has_dsp3
+#define cpu_has_dsp3		(cpu_data[0].ases & MIPS_ASE_DSP3)
+#endif
+
 #ifndef cpu_has_mipsmt
 #define cpu_has_mipsmt		(cpu_data[0].ases & MIPS_ASE_MIPSMT)
 #endif
 
+#ifndef cpu_has_vp
+#define cpu_has_vp		(cpu_data[0].options & MIPS_CPU_VP)
+#endif
+
 #ifndef cpu_has_userlocal
 #define cpu_has_userlocal	(cpu_data[0].options & MIPS_CPU_ULRI)
 #endif
@@ -421,4 +453,107 @@
 #define cpu_has_nan_2008	(cpu_data[0].options & MIPS_CPU_NAN_2008)
 #endif
 
+#ifndef cpu_has_ebase_wg
+# define cpu_has_ebase_wg	(cpu_data[0].options & MIPS_CPU_EBASE_WG)
+#endif
+
+#ifndef cpu_has_badinstr
+# define cpu_has_badinstr	(cpu_data[0].options & MIPS_CPU_BADINSTR)
+#endif
+
+#ifndef cpu_has_badinstrp
+# define cpu_has_badinstrp	(cpu_data[0].options & MIPS_CPU_BADINSTRP)
+#endif
+
+#ifndef cpu_has_contextconfig
+# define cpu_has_contextconfig	(cpu_data[0].options & MIPS_CPU_CTXTC)
+#endif
+
+#ifndef cpu_has_perf
+# define cpu_has_perf		(cpu_data[0].options & MIPS_CPU_PERF)
+#endif
+
+/*
+ * Guest capabilities
+ */
+#ifndef cpu_guest_has_conf1
+#define cpu_guest_has_conf1	(cpu_data[0].guest.conf & (1 << 1))
+#endif
+#ifndef cpu_guest_has_conf2
+#define cpu_guest_has_conf2	(cpu_data[0].guest.conf & (1 << 2))
+#endif
+#ifndef cpu_guest_has_conf3
+#define cpu_guest_has_conf3	(cpu_data[0].guest.conf & (1 << 3))
+#endif
+#ifndef cpu_guest_has_conf4
+#define cpu_guest_has_conf4	(cpu_data[0].guest.conf & (1 << 4))
+#endif
+#ifndef cpu_guest_has_conf5
+#define cpu_guest_has_conf5	(cpu_data[0].guest.conf & (1 << 5))
+#endif
+#ifndef cpu_guest_has_conf6
+#define cpu_guest_has_conf6	(cpu_data[0].guest.conf & (1 << 6))
+#endif
+#ifndef cpu_guest_has_conf7
+#define cpu_guest_has_conf7	(cpu_data[0].guest.conf & (1 << 7))
+#endif
+#ifndef cpu_guest_has_fpu
+#define cpu_guest_has_fpu	(cpu_data[0].guest.options & MIPS_CPU_FPU)
+#endif
+#ifndef cpu_guest_has_watch
+#define cpu_guest_has_watch	(cpu_data[0].guest.options & MIPS_CPU_WATCH)
+#endif
+#ifndef cpu_guest_has_contextconfig
+#define cpu_guest_has_contextconfig (cpu_data[0].guest.options & MIPS_CPU_CTXTC)
+#endif
+#ifndef cpu_guest_has_segments
+#define cpu_guest_has_segments	(cpu_data[0].guest.options & MIPS_CPU_SEGMENTS)
+#endif
+#ifndef cpu_guest_has_badinstr
+#define cpu_guest_has_badinstr	(cpu_data[0].guest.options & MIPS_CPU_BADINSTR)
+#endif
+#ifndef cpu_guest_has_badinstrp
+#define cpu_guest_has_badinstrp	(cpu_data[0].guest.options & MIPS_CPU_BADINSTRP)
+#endif
+#ifndef cpu_guest_has_htw
+#define cpu_guest_has_htw	(cpu_data[0].guest.options & MIPS_CPU_HTW)
+#endif
+#ifndef cpu_guest_has_msa
+#define cpu_guest_has_msa	(cpu_data[0].guest.ases & MIPS_ASE_MSA)
+#endif
+#ifndef cpu_guest_has_kscr
+#define cpu_guest_has_kscr(n)	(cpu_data[0].guest.kscratch_mask & (1u << (n)))
+#endif
+#ifndef cpu_guest_has_rw_llb
+#define cpu_guest_has_rw_llb	(cpu_has_mips_r6 || (cpu_data[0].guest.options & MIPS_CPU_RW_LLB))
+#endif
+#ifndef cpu_guest_has_perf
+#define cpu_guest_has_perf	(cpu_data[0].guest.options & MIPS_CPU_PERF)
+#endif
+#ifndef cpu_guest_has_maar
+#define cpu_guest_has_maar	(cpu_data[0].guest.options & MIPS_CPU_MAAR)
+#endif
+
+/*
+ * Guest dynamic capabilities
+ */
+#ifndef cpu_guest_has_dyn_fpu
+#define cpu_guest_has_dyn_fpu	(cpu_data[0].guest.options_dyn & MIPS_CPU_FPU)
+#endif
+#ifndef cpu_guest_has_dyn_watch
+#define cpu_guest_has_dyn_watch	(cpu_data[0].guest.options_dyn & MIPS_CPU_WATCH)
+#endif
+#ifndef cpu_guest_has_dyn_contextconfig
+#define cpu_guest_has_dyn_contextconfig (cpu_data[0].guest.options_dyn & MIPS_CPU_CTXTC)
+#endif
+#ifndef cpu_guest_has_dyn_perf
+#define cpu_guest_has_dyn_perf	(cpu_data[0].guest.options_dyn & MIPS_CPU_PERF)
+#endif
+#ifndef cpu_guest_has_dyn_msa
+#define cpu_guest_has_dyn_msa	(cpu_data[0].guest.ases_dyn & MIPS_ASE_MSA)
+#endif
+#ifndef cpu_guest_has_dyn_maar
+#define cpu_guest_has_dyn_maar	(cpu_data[0].guest.options_dyn & MIPS_CPU_MAAR)
+#endif
+
 #endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index af12c1f..edbe273 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -28,6 +28,15 @@
 	unsigned char flags;	/* Flags describing cache properties */
 };
 
+struct guest_info {
+	unsigned long		ases;
+	unsigned long		ases_dyn;
+	unsigned long long	options;
+	unsigned long long	options_dyn;
+	u8			conf;
+	u8			kscratch_mask;
+};
+
 /*
  * Flag definitions
  */
@@ -40,6 +49,9 @@
 
 struct cpuinfo_mips {
 	unsigned long		asid_cache;
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	unsigned long		asid_mask;
+#endif
 
 	/*
 	 * Capability and feature descriptor structure for MIPS CPU
@@ -60,6 +72,7 @@
 	int			tlbsizeftlbways;
 	struct cache_desc	icache; /* Primary I-cache */
 	struct cache_desc	dcache; /* Primary D or combined I/D cache */
+	struct cache_desc	vcache; /* Victim cache, between pcache and scache */
 	struct cache_desc	scache; /* Secondary cache */
 	struct cache_desc	tcache; /* Tertiary/split secondary cache */
 	int			srsets; /* Shadow register sets */
@@ -68,7 +81,7 @@
 #ifdef CONFIG_64BIT
 	int			vmbits; /* Virtual memory size in bits */
 #endif
-#ifdef CONFIG_MIPS_MT_SMP
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_CPU_MIPSR6)
 	/*
 	 * There is not necessarily a 1:1 mapping of VPE num to CPU number
 	 * in particular on multi-core systems.
@@ -91,6 +104,11 @@
 	 * htw_start/htw_stop calls
 	 */
 	unsigned int		htw_seq;
+
+	/* VZ & Guest features */
+	struct guest_info	guest;
+	unsigned int		gtoffset_mask;
+	unsigned int		guestid_mask;
 } __attribute__((aligned(SMP_CACHE_BYTES)));
 
 extern struct cpuinfo_mips cpu_data[];
@@ -125,10 +143,31 @@
 	unsigned long n;
 };
 
-#ifdef CONFIG_MIPS_MT_SMP
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_CPU_MIPSR6)
 # define cpu_vpe_id(cpuinfo)	((cpuinfo)->vpe_id)
 #else
 # define cpu_vpe_id(cpuinfo)	({ (void)cpuinfo; 0; })
 #endif
 
+static inline unsigned long cpu_asid_inc(void)
+{
+	return 1 << CONFIG_MIPS_ASID_SHIFT;
+}
+
+static inline unsigned long cpu_asid_mask(struct cpuinfo_mips *cpuinfo)
+{
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	return cpuinfo->asid_mask;
+#endif
+	return ((1 << CONFIG_MIPS_ASID_BITS) - 1) << CONFIG_MIPS_ASID_SHIFT;
+}
+
+static inline void set_cpu_asid_mask(struct cpuinfo_mips *cpuinfo,
+				     unsigned long asid_mask)
+{
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	cpuinfo->asid_mask = asid_mask;
+#endif
+}
+
 #endif /* __ASM_CPU_INFO_H */
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index abee2bf..fbe1881 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -77,8 +77,13 @@
 	 */
 #endif
 
+#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R6
+	case CPU_M6250:
+#endif
+
 #ifdef CONFIG_SYS_HAS_CPU_MIPS64_R6
 	case CPU_I6400:
+	case CPU_P6600:
 #endif
 
 #ifdef CONFIG_SYS_HAS_CPU_R3000
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index a97ca97..f672df8 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -42,6 +42,7 @@
 #define PRID_COMP_LEXRA		0x0b0000
 #define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
+#define PRID_COMP_LOONGSON	0x140000
 #define PRID_COMP_INGENIC_D0	0xd00000	/* JZ4740, JZ4750 */
 #define PRID_COMP_INGENIC_D1	0xd10000	/* JZ4770, JZ4775 */
 #define PRID_COMP_INGENIC_E1	0xe10000	/* JZ4780 */
@@ -118,9 +119,11 @@
 #define PRID_IMP_INTERAPTIV_MP	0xa100
 #define PRID_IMP_PROAPTIV_UP	0xa200
 #define PRID_IMP_PROAPTIV_MP	0xa300
+#define PRID_IMP_P6600		0xa400
 #define PRID_IMP_M5150		0xa700
 #define PRID_IMP_P5600		0xa800
 #define PRID_IMP_I6400		0xa900
+#define PRID_IMP_M6250		0xab00
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE
@@ -169,6 +172,8 @@
 #define PRID_IMP_CAVIUM_CNF71XX 0x9400
 #define PRID_IMP_CAVIUM_CN78XX 0x9500
 #define PRID_IMP_CAVIUM_CN70XX 0x9600
+#define PRID_IMP_CAVIUM_CN73XX 0x9700
+#define PRID_IMP_CAVIUM_CNF75XX 0x9800
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_INGENIC_*
@@ -237,9 +242,10 @@
 #define PRID_REV_LOONGSON1B	0x0020
 #define PRID_REV_LOONGSON2E	0x0002
 #define PRID_REV_LOONGSON2F	0x0003
-#define PRID_REV_LOONGSON3A	0x0005
+#define PRID_REV_LOONGSON3A_R1	0x0005
 #define PRID_REV_LOONGSON3B_R1	0x0006
 #define PRID_REV_LOONGSON3B_R2	0x0007
+#define PRID_REV_LOONGSON3A_R2	0x0008
 
 /*
  * Older processors used to encode processor version and revision in two
@@ -307,8 +313,8 @@
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
 	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_LOONGSON1, CPU_M14KC,
-	CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K, CPU_M5150,
-	CPU_I6400,
+	CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K,
+	CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250,
 
 	/*
 	 * MIPS64 class processors
@@ -346,48 +352,68 @@
 	MIPS_CPU_ISA_M64R6)
 
 /*
+ * Private version of BIT_ULL() to escape include file recursion hell.
+ * We soon will have to switch to another mechanism that will work with
+ * more than 64 bits anyway.
+ */
+#define MBIT_ULL(bit)		(1ULL << (bit))
+
+/*
  * CPU Option encodings
  */
-#define MIPS_CPU_TLB		0x00000001ull /* CPU has TLB */
-#define MIPS_CPU_4KEX		0x00000002ull /* "R4K" exception model */
-#define MIPS_CPU_3K_CACHE	0x00000004ull /* R3000-style caches */
-#define MIPS_CPU_4K_CACHE	0x00000008ull /* R4000-style caches */
-#define MIPS_CPU_TX39_CACHE	0x00000010ull /* TX3900-style caches */
-#define MIPS_CPU_FPU		0x00000020ull /* CPU has FPU */
-#define MIPS_CPU_32FPR		0x00000040ull /* 32 dbl. prec. FP registers */
-#define MIPS_CPU_COUNTER	0x00000080ull /* Cycle count/compare */
-#define MIPS_CPU_WATCH		0x00000100ull /* watchpoint registers */
-#define MIPS_CPU_DIVEC		0x00000200ull /* dedicated interrupt vector */
-#define MIPS_CPU_VCE		0x00000400ull /* virt. coherence conflict possible */
-#define MIPS_CPU_CACHE_CDEX_P	0x00000800ull /* Create_Dirty_Exclusive CACHE op */
-#define MIPS_CPU_CACHE_CDEX_S	0x00001000ull /* ... same for seconary cache ... */
-#define MIPS_CPU_MCHECK		0x00002000ull /* Machine check exception */
-#define MIPS_CPU_EJTAG		0x00004000ull /* EJTAG exception */
-#define MIPS_CPU_NOFPUEX	0x00008000ull /* no FPU exception */
-#define MIPS_CPU_LLSC		0x00010000ull /* CPU has ll/sc instructions */
-#define MIPS_CPU_INCLUSIVE_CACHES	0x00020000ull /* P-cache subset enforced */
-#define MIPS_CPU_PREFETCH	0x00040000ull /* CPU has usable prefetch */
-#define MIPS_CPU_VINT		0x00080000ull /* CPU supports MIPSR2 vectored interrupts */
-#define MIPS_CPU_VEIC		0x00100000ull /* CPU supports MIPSR2 external interrupt controller mode */
-#define MIPS_CPU_ULRI		0x00200000ull /* CPU has ULRI feature */
-#define MIPS_CPU_PCI		0x00400000ull /* CPU has Perf Ctr Int indicator */
-#define MIPS_CPU_RIXI		0x00800000ull /* CPU has TLB Read/eXec Inhibit */
-#define MIPS_CPU_MICROMIPS	0x01000000ull /* CPU has microMIPS capability */
-#define MIPS_CPU_TLBINV		0x02000000ull /* CPU supports TLBINV/F */
-#define MIPS_CPU_SEGMENTS	0x04000000ull /* CPU supports Segmentation Control registers */
-#define MIPS_CPU_EVA		0x80000000ull /* CPU supports Enhanced Virtual Addressing */
-#define MIPS_CPU_HTW		0x100000000ull /* CPU support Hardware Page Table Walker */
-#define MIPS_CPU_RIXIEX		0x200000000ull /* CPU has unique exception codes for {Read, Execute}-Inhibit exceptions */
-#define MIPS_CPU_MAAR		0x400000000ull /* MAAR(I) registers are present */
-#define MIPS_CPU_FRE		0x800000000ull /* FRE & UFE bits implemented */
-#define MIPS_CPU_RW_LLB		0x1000000000ull /* LLADDR/LLB writes are allowed */
-#define MIPS_CPU_XPA		0x2000000000ull /* CPU supports Extended Physical Addressing */
-#define MIPS_CPU_CDMM		0x4000000000ull	/* CPU has Common Device Memory Map */
-#define MIPS_CPU_BP_GHIST	0x8000000000ull /* R12K+ Branch Prediction Global History */
-#define MIPS_CPU_SP		0x10000000000ull /* Small (1KB) page support */
-#define MIPS_CPU_FTLB		0x20000000000ull /* CPU has Fixed-page-size TLB */
-#define MIPS_CPU_NAN_LEGACY	0x40000000000ull /* Legacy NaN implemented */
-#define MIPS_CPU_NAN_2008	0x80000000000ull /* 2008 NaN implemented */
+#define MIPS_CPU_TLB		MBIT_ULL( 0)	/* CPU has TLB */
+#define MIPS_CPU_4KEX		MBIT_ULL( 1)	/* "R4K" exception model */
+#define MIPS_CPU_3K_CACHE	MBIT_ULL( 2)	/* R3000-style caches */
+#define MIPS_CPU_4K_CACHE	MBIT_ULL( 3)	/* R4000-style caches */
+#define MIPS_CPU_TX39_CACHE	MBIT_ULL( 4)	/* TX3900-style caches */
+#define MIPS_CPU_FPU		MBIT_ULL( 5)	/* CPU has FPU */
+#define MIPS_CPU_32FPR		MBIT_ULL( 6)	/* 32 dbl. prec. FP registers */
+#define MIPS_CPU_COUNTER	MBIT_ULL( 7)	/* Cycle count/compare */
+#define MIPS_CPU_WATCH		MBIT_ULL( 8)	/* watchpoint registers */
+#define MIPS_CPU_DIVEC		MBIT_ULL( 9)	/* dedicated interrupt vector */
+#define MIPS_CPU_VCE		MBIT_ULL(10)	/* virt. coherence conflict possible */
+#define MIPS_CPU_CACHE_CDEX_P	MBIT_ULL(11)	/* Create_Dirty_Exclusive CACHE op */
+#define MIPS_CPU_CACHE_CDEX_S	MBIT_ULL(12)	/* ... same for seconary cache ... */
+#define MIPS_CPU_MCHECK		MBIT_ULL(13)	/* Machine check exception */
+#define MIPS_CPU_EJTAG		MBIT_ULL(14)	/* EJTAG exception */
+#define MIPS_CPU_NOFPUEX	MBIT_ULL(15)	/* no FPU exception */
+#define MIPS_CPU_LLSC		MBIT_ULL(16)	/* CPU has ll/sc instructions */
+#define MIPS_CPU_INCLUSIVE_CACHES	MBIT_ULL(17)	/* P-cache subset enforced */
+#define MIPS_CPU_PREFETCH	MBIT_ULL(18)	/* CPU has usable prefetch */
+#define MIPS_CPU_VINT		MBIT_ULL(19)	/* CPU supports MIPSR2 vectored interrupts */
+#define MIPS_CPU_VEIC		MBIT_ULL(20)	/* CPU supports MIPSR2 external interrupt controller mode */
+#define MIPS_CPU_ULRI		MBIT_ULL(21)	/* CPU has ULRI feature */
+#define MIPS_CPU_PCI		MBIT_ULL(22)	/* CPU has Perf Ctr Int indicator */
+#define MIPS_CPU_RIXI		MBIT_ULL(23)	/* CPU has TLB Read/eXec Inhibit */
+#define MIPS_CPU_MICROMIPS	MBIT_ULL(24)	/* CPU has microMIPS capability */
+#define MIPS_CPU_TLBINV		MBIT_ULL(25)	/* CPU supports TLBINV/F */
+#define MIPS_CPU_SEGMENTS	MBIT_ULL(26)	/* CPU supports Segmentation Control registers */
+#define MIPS_CPU_EVA		MBIT_ULL(27)	/* CPU supports Enhanced Virtual Addressing */
+#define MIPS_CPU_HTW		MBIT_ULL(28)	/* CPU support Hardware Page Table Walker */
+#define MIPS_CPU_RIXIEX		MBIT_ULL(29)	/* CPU has unique exception codes for {Read, Execute}-Inhibit exceptions */
+#define MIPS_CPU_MAAR		MBIT_ULL(30)	/* MAAR(I) registers are present */
+#define MIPS_CPU_FRE		MBIT_ULL(31)	/* FRE & UFE bits implemented */
+#define MIPS_CPU_RW_LLB		MBIT_ULL(32)	/* LLADDR/LLB writes are allowed */
+#define MIPS_CPU_LPA		MBIT_ULL(33)	/* CPU supports Large Physical Addressing */
+#define MIPS_CPU_CDMM		MBIT_ULL(34)	/* CPU has Common Device Memory Map */
+#define MIPS_CPU_BP_GHIST	MBIT_ULL(35)	/* R12K+ Branch Prediction Global History */
+#define MIPS_CPU_SP		MBIT_ULL(36)	/* Small (1KB) page support */
+#define MIPS_CPU_FTLB		MBIT_ULL(37)	/* CPU has Fixed-page-size TLB */
+#define MIPS_CPU_NAN_LEGACY	MBIT_ULL(38)	/* Legacy NaN implemented */
+#define MIPS_CPU_NAN_2008	MBIT_ULL(39)	/* 2008 NaN implemented */
+#define MIPS_CPU_VP		MBIT_ULL(40)	/* MIPSr6 Virtual Processors (multi-threading) */
+#define MIPS_CPU_LDPTE		MBIT_ULL(41)	/* CPU has ldpte/lddir instructions */
+#define MIPS_CPU_MVH		MBIT_ULL(42)	/* CPU supports MFHC0/MTHC0 */
+#define MIPS_CPU_EBASE_WG	MBIT_ULL(43)	/* CPU has EBase.WG */
+#define MIPS_CPU_BADINSTR	MBIT_ULL(44)	/* CPU has BadInstr register */
+#define MIPS_CPU_BADINSTRP	MBIT_ULL(45)	/* CPU has BadInstrP register */
+#define MIPS_CPU_CTXTC		MBIT_ULL(46)	/* CPU has [X]ConfigContext registers */
+#define MIPS_CPU_PERF		MBIT_ULL(47)	/* CPU has MIPS performance counters */
+#define MIPS_CPU_GUESTCTL0EXT	MBIT_ULL(48)	/* CPU has VZ GuestCtl0Ext register */
+#define MIPS_CPU_GUESTCTL1	MBIT_ULL(49)	/* CPU has VZ GuestCtl1 register */
+#define MIPS_CPU_GUESTCTL2	MBIT_ULL(50)	/* CPU has VZ GuestCtl2 register */
+#define MIPS_CPU_GUESTID	MBIT_ULL(51)	/* CPU uses VZ ASE GuestID feature */
+#define MIPS_CPU_DRG		MBIT_ULL(52)	/* CPU has VZ Direct Root to Guest (DRG) */
 
 /*
  * CPU ASE encodings
@@ -401,5 +427,6 @@
 #define MIPS_ASE_DSP2P		0x00000040 /* Signal Processing ASE Rev 2 */
 #define MIPS_ASE_VZ		0x00000080 /* Virtualization ASE */
 #define MIPS_ASE_MSA		0x00000100 /* MIPS SIMD Architecture */
+#define MIPS_ASE_DSP3		0x00000200 /* Signal Processing ASE Rev 3*/
 
 #endif /* _ASM_CPU_H */
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index e090fc3..f5f4571 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -111,6 +111,11 @@
 #define R_MIPS_CALLHI16		30
 #define R_MIPS_CALLLO16		31
 /*
+ * Introduced for MIPSr6.
+ */
+#define R_MIPS_PC21_S2		60
+#define R_MIPS_PC26_S2		61
+/*
  * This range is reserved for vendor specific relocations.
  */
 #define R_MIPS_LOVENDOR		100
@@ -170,16 +175,14 @@
 #define SHF_MIPS_NAMES		0x02000000
 #define SHF_MIPS_NODUPES	0x01000000
 
-#ifndef ELF_ARCH
-/* ELF register definitions */
-#define ELF_NGREG	45
-#define ELF_NFPREG	33
-
-typedef unsigned long elf_greg_t;
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef double elf_fpreg_t;
-typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+#define MIPS_ABI_FP_ANY		0	/* FP ABI doesn't matter */
+#define MIPS_ABI_FP_DOUBLE	1	/* -mdouble-float */
+#define MIPS_ABI_FP_SINGLE	2	/* -msingle-float */
+#define MIPS_ABI_FP_SOFT	3	/* -msoft-float */
+#define MIPS_ABI_FP_OLD_64	4	/* -mips32r2 -mfp64 */
+#define MIPS_ABI_FP_XX		5	/* -mfpxx */
+#define MIPS_ABI_FP_64		6	/* -mips32r2 -mfp64 */
+#define MIPS_ABI_FP_64A		7	/* -mips32r2 -mfp64 -mno-odd-spreg */
 
 struct mips_elf_abiflags_v0 {
 	uint16_t version;	/* Version of flags structure */
@@ -196,51 +199,22 @@
 	uint32_t flags2;
 };
 
-#define MIPS_ABI_FP_ANY		0	/* FP ABI doesn't matter */
-#define MIPS_ABI_FP_DOUBLE	1	/* -mdouble-float */
-#define MIPS_ABI_FP_SINGLE	2	/* -msingle-float */
-#define MIPS_ABI_FP_SOFT	3	/* -msoft-float */
-#define MIPS_ABI_FP_OLD_64	4	/* -mips32r2 -mfp64 */
-#define MIPS_ABI_FP_XX		5	/* -mfpxx */
-#define MIPS_ABI_FP_64		6	/* -mips32r2 -mfp64 */
-#define MIPS_ABI_FP_64A		7	/* -mips32r2 -mfp64 -mno-odd-spreg */
+#ifndef ELF_ARCH
+/* ELF register definitions */
+#define ELF_NGREG	45
+#define ELF_NFPREG	33
+
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 
 #ifdef CONFIG_32BIT
-
-/*
- * In order to be sure that we don't attempt to execute an O32 binary which
- * requires 64 bit FP (FR=1) on a system which does not support it we refuse
- * to execute any binary which has bits specified by the following macro set
- * in its ELF header flags.
- */
-#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
-# define __MIPS_O32_FP64_MUST_BE_ZERO	0
-#else
-# define __MIPS_O32_FP64_MUST_BE_ZERO	EF_MIPS_FP64
-#endif
-
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(hdr)						\
-({									\
-	int __res = 1;							\
-	struct elfhdr *__h = (hdr);					\
-									\
-	if (!mips_elf_check_machine(__h))				\
-		__res = 0;						\
-	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
-		__res = 0;						\
-	if ((__h->e_flags & EF_MIPS_ABI2) != 0)				\
-		__res = 0;						\
-	if (((__h->e_flags & EF_MIPS_ABI) != 0) &&			\
-	    ((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32))		\
-		__res = 0;						\
-	if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO)		\
-		__res = 0;						\
-									\
-	__res;								\
-})
+#define elf_check_arch elfo32_check_arch
 
 /*
  * These are used to set parameters in the core dumps.
@@ -253,18 +227,7 @@
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(hdr)						\
-({									\
-	int __res = 1;							\
-	struct elfhdr *__h = (hdr);					\
-									\
-	if (!mips_elf_check_machine(__h))				\
-		__res = 0;						\
-	if (__h->e_ident[EI_CLASS] != ELFCLASS64)			\
-		__res = 0;						\
-									\
-	__res;								\
-})
+#define elf_check_arch elfn64_check_arch
 
 /*
  * These are used to set parameters in the core dumps.
@@ -285,11 +248,81 @@
 
 #endif /* !defined(ELF_ARCH) */
 
+/*
+ * In order to be sure that we don't attempt to execute an O32 binary which
+ * requires 64 bit FP (FR=1) on a system which does not support it we refuse
+ * to execute any binary which has bits specified by the following macro set
+ * in its ELF header flags.
+ */
+#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
+# define __MIPS_O32_FP64_MUST_BE_ZERO	0
+#else
+# define __MIPS_O32_FP64_MUST_BE_ZERO	EF_MIPS_FP64
+#endif
+
 #define mips_elf_check_machine(x) ((x)->e_machine == EM_MIPS)
 
 #define vmcore_elf32_check_arch mips_elf_check_machine
 #define vmcore_elf64_check_arch mips_elf_check_machine
 
+/*
+ * Return non-zero if HDR identifies an o32 ELF binary.
+ */
+#define elfo32_check_arch(hdr)						\
+({									\
+	int __res = 1;							\
+	struct elfhdr *__h = (hdr);					\
+									\
+	if (!mips_elf_check_machine(__h))				\
+		__res = 0;						\
+	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
+		__res = 0;						\
+	if ((__h->e_flags & EF_MIPS_ABI2) != 0)				\
+		__res = 0;						\
+	if (((__h->e_flags & EF_MIPS_ABI) != 0) &&			\
+	    ((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32))		\
+		__res = 0;						\
+	if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO)		\
+		__res = 0;						\
+									\
+	__res;								\
+})
+
+/*
+ * Return non-zero if HDR identifies an n64 ELF binary.
+ */
+#define elfn64_check_arch(hdr)						\
+({									\
+	int __res = 1;							\
+	struct elfhdr *__h = (hdr);					\
+									\
+	if (!mips_elf_check_machine(__h))				\
+		__res = 0;						\
+	if (__h->e_ident[EI_CLASS] != ELFCLASS64)			\
+		__res = 0;						\
+									\
+	__res;								\
+})
+
+/*
+ * Return non-zero if HDR identifies an n32 ELF binary.
+ */
+#define elfn32_check_arch(hdr)						\
+({									\
+	int __res = 1;							\
+	struct elfhdr *__h = (hdr);					\
+									\
+	if (!mips_elf_check_machine(__h))				\
+		__res = 0;						\
+	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
+		__res = 0;						\
+	if (((__h->e_flags & EF_MIPS_ABI2) == 0) ||			\
+	    ((__h->e_flags & EF_MIPS_ABI) != 0))			\
+		__res = 0;						\
+									\
+	__res;								\
+})
+
 struct mips_abi;
 
 extern struct mips_abi mips_abi;
@@ -300,17 +333,16 @@
 
 #define SET_PERSONALITY2(ex, state)					\
 do {									\
-	if (personality(current->personality) != PER_LINUX)		\
-		set_personality(PER_LINUX);				\
-									\
 	clear_thread_flag(TIF_HYBRID_FPREGS);				\
 	set_thread_flag(TIF_32BIT_FPREGS);				\
 									\
-	mips_set_personality_fp(state);					\
-									\
 	current->thread.abi = &mips_abi;				\
 									\
+	mips_set_personality_fp(state);					\
 	mips_set_personality_nan(state);				\
+									\
+	if (personality(current->personality) != PER_LINUX)		\
+		set_personality(PER_LINUX);				\
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -321,6 +353,7 @@
 #define __SET_PERSONALITY32_N32()					\
 	do {								\
 		set_thread_flag(TIF_32BIT_ADDR);			\
+									\
 		current->thread.abi = &mips_abi_n32;			\
 	} while (0)
 #else
@@ -336,9 +369,9 @@
 		clear_thread_flag(TIF_HYBRID_FPREGS);			\
 		set_thread_flag(TIF_32BIT_FPREGS);			\
 									\
-		mips_set_personality_fp(state);				\
-									\
 		current->thread.abi = &mips_abi_32;			\
+									\
+		mips_set_personality_fp(state);				\
 	} while (0)
 #else
 #define __SET_PERSONALITY32_O32(ex, state)				\
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index 7b99efd..dbb1eb6 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -22,7 +22,8 @@
 /*
  * TLB hazards
  */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) && !defined(CONFIG_CPU_CAVIUM_OCTEON)
+#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
+	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT)
 
 /*
  * MIPSR2 defines ehb for hazard avoidance
@@ -155,8 +156,8 @@
 } while (0)
 
 #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
-	defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_CPU_R10000) || \
-	defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
+	defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
+	defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
 
 /*
  * R10000 rocks - all hazards handled in hardware, so this becomes a nobrainer.
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index 01880b3..64f2500 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -19,8 +19,10 @@
 
 #ifdef __KERNEL__
 
+#include <linux/bug.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
+#include <asm/cpu-features.h>
 #include <asm/kmap_types.h>
 
 /* undef for production */
@@ -50,7 +52,7 @@
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
 
-#define flush_cache_kmaps()	flush_cache_all()
+#define flush_cache_kmaps()	BUG_ON(cpu_has_dc_aliases)
 
 extern void kmap_init(void);
 
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 2b4dc7a..ecabc00 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -304,10 +304,10 @@
 #undef __IS_KSEG1
 }
 
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-#define war_octeon_io_reorder_wmb()		wmb()
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT)
+#define war_io_reorder_wmb()		wmb()
 #else
-#define war_octeon_io_reorder_wmb()		do { } while (0)
+#define war_io_reorder_wmb()		do { } while (0)
 #endif
 
 #define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq)			\
@@ -318,7 +318,7 @@
 	volatile type *__mem;						\
 	type __val;							\
 									\
-	war_octeon_io_reorder_wmb();					\
+	war_io_reorder_wmb();					\
 									\
 	__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));	\
 									\
@@ -387,7 +387,7 @@
 	volatile type *__addr;						\
 	type __val;							\
 									\
-	war_octeon_io_reorder_wmb();					\
+	war_io_reorder_wmb();					\
 									\
 	__addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
 									\
diff --git a/arch/mips/include/asm/irq_regs.h b/arch/mips/include/asm/irq_regs.h
index 33bd2a0..8c48d6d 100644
--- a/arch/mips/include/asm/irq_regs.h
+++ b/arch/mips/include/asm/irq_regs.h
@@ -18,4 +18,14 @@
 	return current_thread_info()->regs;
 }
 
+static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = get_irq_regs();
+	current_thread_info()->regs = new_regs;
+
+	return old_regs;
+}
+
 #endif /* __ASM_IRQ_REGS_H */
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index 65c351e..9d3610b 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -41,7 +41,12 @@
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
+#if defined(CONFIG_CPU_LOONGSON3)
+	"	mfc0	%[flags], $12					\n"
+	"	di							\n"
+#else
 	"	di	%[flags]					\n"
+#endif
 	"	andi	%[flags], 1					\n"
 	"	" __stringify(__irq_disable_hazard) "			\n"
 	"	.set	pop						\n"
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index f6b1279..6733ac5 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -122,6 +122,7 @@
 	u32 flush_dcache_exits;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 };
 
@@ -311,17 +312,18 @@
 #define MIPS3_PG_FRAME		0x3fffffc0
 
 #define VPN2_MASK		0xffffe000
+#define KVM_ENTRYHI_ASID	MIPS_ENTRYHI_ASID
 #define TLB_IS_GLOBAL(x)	(((x).tlb_lo0 & MIPS3_PG_G) &&		\
 				 ((x).tlb_lo1 & MIPS3_PG_G))
 #define TLB_VPN2(x)		((x).tlb_hi & VPN2_MASK)
-#define TLB_ASID(x)		((x).tlb_hi & ASID_MASK)
+#define TLB_ASID(x)		((x).tlb_hi & KVM_ENTRYHI_ASID)
 #define TLB_IS_VALID(x, va)	(((va) & (1 << PAGE_SHIFT))		\
 				 ? ((x).tlb_lo1 & MIPS3_PG_V)		\
 				 : ((x).tlb_lo0 & MIPS3_PG_V))
 #define TLB_HI_VPN2_HIT(x, y)	((TLB_VPN2(x) & ~(x).tlb_mask) ==	\
 				 ((y) & VPN2_MASK & ~(x).tlb_mask))
 #define TLB_HI_ASID_HIT(x, y)	(TLB_IS_GLOBAL(x) ||			\
-				 TLB_ASID(x) == ((y) & ASID_MASK))
+				 TLB_ASID(x) == ((y) & KVM_ENTRYHI_ASID))
 
 struct kvm_mips_tlb {
 	long tlb_mask;
@@ -747,7 +749,7 @@
 
 uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
 void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack);
 void kvm_mips_init_count(struct kvm_vcpu *vcpu);
 int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
 int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
@@ -812,5 +814,6 @@
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
new file mode 100644
index 0000000..c6d17d1
--- /dev/null
+++ b/arch/mips/include/asm/llsc.h
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Macros for 32/64-bit neutral inline assembler
+ */
+
+#ifndef __ASM_LLSC_H
+#define __ASM_LLSC_H
+
+#if _MIPS_SZLONG == 32
+#define SZLONG_LOG 5
+#define SZLONG_MASK 31UL
+#define __LL		"ll	"
+#define __SC		"sc	"
+#define __INS		"ins	"
+#define __EXT		"ext	"
+#elif _MIPS_SZLONG == 64
+#define SZLONG_LOG 6
+#define SZLONG_MASK 63UL
+#define __LL		"lld	"
+#define __SC		"scd	"
+#define __INS		"dins	"
+#define __EXT		"dext	"
+#endif
+
+#endif /* __ASM_LLSC_H  */
diff --git a/arch/mips/include/asm/mach-bmips/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bmips/cpu-feature-overrides.h
new file mode 100644
index 0000000..fa0583e
--- /dev/null
+++ b/arch/mips/include/asm/mach-bmips/cpu-feature-overrides.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_MACH_BMIPS_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_BMIPS_CPU_FEATURE_OVERRIDES_H
+
+/* Invariants across all BMIPS processors */
+#define cpu_has_vtag_icache		0
+#define cpu_icache_snoops_remote_store	1
+
+/* Processor ISA compatibility is MIPS32R1 */
+#define cpu_has_mips32r1		1
+#define cpu_has_mips32r2		0
+#define cpu_has_mips64r1		0
+#define cpu_has_mips64r2		0
+
+#endif /* __ASM_MACH_BMIPS_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-bmips/ioremap.h b/arch/mips/include/asm/mach-bmips/ioremap.h
new file mode 100644
index 0000000..29c7a7b
--- /dev/null
+++ b/arch/mips/include/asm/mach-bmips/ioremap.h
@@ -0,0 +1,33 @@
+#ifndef __ASM_MACH_BMIPS_IOREMAP_H
+#define __ASM_MACH_BMIPS_IOREMAP_H
+
+#include <linux/types.h>
+
+static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
+{
+	return phys_addr;
+}
+
+static inline int is_bmips_internal_registers(phys_addr_t offset)
+{
+	if (offset >= 0xfff80000)
+		return 1;
+
+	return 0;
+}
+
+static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
+					 unsigned long flags)
+{
+	if (is_bmips_internal_registers(offset))
+		return (void __iomem *)offset;
+
+	return NULL;
+}
+
+static inline int plat_iounmap(const volatile void __iomem *addr)
+{
+	return is_bmips_internal_registers((unsigned long)addr);
+}
+
+#endif /* __ASM_MACH_BMIPS_IOREMAP_H */
diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h
index 32cfbe6..073b8bf 100644
--- a/arch/mips/include/asm/mach-jz4740/platform.h
+++ b/arch/mips/include/asm/mach-jz4740/platform.h
@@ -19,7 +19,6 @@
 
 #include <linux/platform_device.h>
 
-extern struct platform_device jz4740_usb_ohci_device;
 extern struct platform_device jz4740_udc_device;
 extern struct platform_device jz4740_udc_xceiv_device;
 extern struct platform_device jz4740_mmc_device;
diff --git a/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
index 98d6a2f..7023883 100644
--- a/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
+++ b/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
@@ -3,7 +3,7 @@
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LTQ_FALCON_H__
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq.h b/arch/mips/include/asm/mach-lantiq/lantiq.h
index 4e5ae65..8064d7a 100644
--- a/arch/mips/include/asm/mach-lantiq/lantiq.h
+++ b/arch/mips/include/asm/mach-lantiq/lantiq.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 #ifndef _LANTIQ_H__
 #define _LANTIQ_H__
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
index e23bf7c..17d2fdc 100644
--- a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
+++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LANTIQ_PLATFORM_H__
diff --git a/arch/mips/include/asm/mach-lantiq/xway/irq.h b/arch/mips/include/asm/mach-lantiq/xway/irq.h
index a1471d2..83e5f03 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/irq.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/irq.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef __LANTIQ_IRQ_H
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
index 5eadfe5..1410763 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LANTIQ_XWAY_IRQ_H__
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
index dd6005b..f873107 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LTQ_XWAY_H__
diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
index 5f8693d..4901833 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -12,7 +12,7 @@
  *   along with this program; if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  *
- *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ *   Copyright (C) 2011 John Crispin <john@phrozen.org>
  */
 
 #ifndef LTQ_DMA_H__
diff --git a/arch/mips/include/asm/mach-loongson32/cpufreq.h b/arch/mips/include/asm/mach-loongson32/cpufreq.h
index 6843fa1..2f1ecb0 100644
--- a/arch/mips/include/asm/mach-loongson32/cpufreq.h
+++ b/arch/mips/include/asm/mach-loongson32/cpufreq.h
@@ -9,7 +9,6 @@
  * option) any later version.
  */
 
-
 #ifndef __ASM_MACH_LOONGSON32_CPUFREQ_H
 #define __ASM_MACH_LOONGSON32_CPUFREQ_H
 
diff --git a/arch/mips/include/asm/mach-loongson32/dma.h b/arch/mips/include/asm/mach-loongson32/dma.h
new file mode 100644
index 0000000..ad1dec7
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson32/dma.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com>
+ *
+ * Loongson 1 NAND platform support.
+ *
+ * This program is free software; you can redistribute	it and/or modify it
+ * under  the terms of	the GNU General	 Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_MACH_LOONGSON32_DMA_H
+#define __ASM_MACH_LOONGSON32_DMA_H
+
+#define LS1X_DMA_CHANNEL0	0
+#define LS1X_DMA_CHANNEL1	1
+#define LS1X_DMA_CHANNEL2	2
+
+struct plat_ls1x_dma {
+	int nr_channels;
+};
+
+extern struct plat_ls1x_dma ls1b_dma_pdata;
+
+#endif /* __ASM_MACH_LOONGSON32_DMA_H */
diff --git a/arch/mips/include/asm/mach-loongson32/irq.h b/arch/mips/include/asm/mach-loongson32/irq.h
index 0d35b99..c1c7441 100644
--- a/arch/mips/include/asm/mach-loongson32/irq.h
+++ b/arch/mips/include/asm/mach-loongson32/irq.h
@@ -9,7 +9,6 @@
  * option) any later version.
  */
 
-
 #ifndef __ASM_MACH_LOONGSON32_IRQ_H
 #define __ASM_MACH_LOONGSON32_IRQ_H
 
diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h
index 12aa129..978f6df 100644
--- a/arch/mips/include/asm/mach-loongson32/loongson1.h
+++ b/arch/mips/include/asm/mach-loongson32/loongson1.h
@@ -9,7 +9,6 @@
  * option) any later version.
  */
 
-
 #ifndef __ASM_MACH_LOONGSON32_LOONGSON1_H
 #define __ASM_MACH_LOONGSON32_LOONGSON1_H
 
@@ -18,6 +17,9 @@
 /* Loongson 1 Register Bases */
 #define LS1X_MUX_BASE			0x1fd00420
 #define LS1X_INTC_BASE			0x1fd01040
+#define LS1X_GPIO0_BASE			0x1fd010c0
+#define LS1X_GPIO1_BASE			0x1fd010c4
+#define LS1X_DMAC_BASE			0x1fd01160
 #define LS1X_EHCI_BASE			0x1fe00000
 #define LS1X_OHCI_BASE			0x1fe08000
 #define LS1X_GMAC0_BASE			0x1fe10000
diff --git a/arch/mips/include/asm/mach-loongson32/nand.h b/arch/mips/include/asm/mach-loongson32/nand.h
new file mode 100644
index 0000000..e274912
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson32/nand.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com>
+ *
+ * Loongson 1 NAND platform support.
+ *
+ * This program is free software; you can redistribute	it and/or modify it
+ * under  the terms of	the GNU General	 Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_MACH_LOONGSON32_NAND_H
+#define __ASM_MACH_LOONGSON32_NAND_H
+
+#include <linux/dmaengine.h>
+#include <linux/mtd/partitions.h>
+
+struct plat_ls1x_nand {
+	struct mtd_partition *parts;
+	unsigned int nr_parts;
+
+	int hold_cycle;
+	int wait_cycle;
+};
+
+extern struct plat_ls1x_nand ls1b_nand_pdata;
+
+bool ls1x_dma_filter_fn(struct dma_chan *chan, void *param);
+
+#endif /* __ASM_MACH_LOONGSON32_NAND_H */
diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h
index c32f03f..672531a 100644
--- a/arch/mips/include/asm/mach-loongson32/platform.h
+++ b/arch/mips/include/asm/mach-loongson32/platform.h
@@ -7,20 +7,28 @@
  * option) any later version.
  */
 
-
 #ifndef __ASM_MACH_LOONGSON32_PLATFORM_H
 #define __ASM_MACH_LOONGSON32_PLATFORM_H
 
 #include <linux/platform_device.h>
 
+#include <dma.h>
+#include <nand.h>
+
 extern struct platform_device ls1x_uart_pdev;
 extern struct platform_device ls1x_cpufreq_pdev;
+extern struct platform_device ls1x_dma_pdev;
 extern struct platform_device ls1x_eth0_pdev;
 extern struct platform_device ls1x_eth1_pdev;
 extern struct platform_device ls1x_ehci_pdev;
+extern struct platform_device ls1x_gpio0_pdev;
+extern struct platform_device ls1x_gpio1_pdev;
+extern struct platform_device ls1x_nand_pdev;
 extern struct platform_device ls1x_rtc_pdev;
 
-extern void __init ls1x_clk_init(void);
-extern void __init ls1x_serial_setup(struct platform_device *pdev);
+void __init ls1x_clk_init(void);
+void __init ls1x_dma_set_platdata(struct plat_ls1x_dma *pdata);
+void __init ls1x_nand_set_platdata(struct plat_ls1x_nand *pdata);
+void __init ls1x_serial_set_uartclk(struct platform_device *pdev);
 
 #endif /* __ASM_MACH_LOONGSON32_PLATFORM_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h
index 1f5a715..4d56fc3 100644
--- a/arch/mips/include/asm/mach-loongson32/regs-clk.h
+++ b/arch/mips/include/asm/mach-loongson32/regs-clk.h
@@ -19,18 +19,18 @@
 #define LS1X_CLK_PLL_DIV		LS1X_CLK_REG(0x4)
 
 /* Clock PLL Divisor Register Bits */
-#define DIV_DC_EN			(0x1 << 31)
-#define DIV_DC_RST			(0x1 << 30)
-#define DIV_CPU_EN			(0x1 << 25)
-#define DIV_CPU_RST			(0x1 << 24)
-#define DIV_DDR_EN			(0x1 << 19)
-#define DIV_DDR_RST			(0x1 << 18)
-#define RST_DC_EN			(0x1 << 5)
-#define RST_DC				(0x1 << 4)
-#define RST_DDR_EN			(0x1 << 3)
-#define RST_DDR				(0x1 << 2)
-#define RST_CPU_EN			(0x1 << 1)
-#define RST_CPU				0x1
+#define DIV_DC_EN			BIT(31)
+#define DIV_DC_RST			BIT(30)
+#define DIV_CPU_EN			BIT(25)
+#define DIV_CPU_RST			BIT(24)
+#define DIV_DDR_EN			BIT(19)
+#define DIV_DDR_RST			BIT(18)
+#define RST_DC_EN			BIT(5)
+#define RST_DC				BIT(4)
+#define RST_DDR_EN			BIT(3)
+#define RST_DDR				BIT(2)
+#define RST_CPU_EN			BIT(1)
+#define RST_CPU				BIT(0)
 
 #define DIV_DC_SHIFT			26
 #define DIV_CPU_SHIFT			20
diff --git a/arch/mips/include/asm/mach-loongson32/regs-mux.h b/arch/mips/include/asm/mach-loongson32/regs-mux.h
index 8302d92..7c394f9 100644
--- a/arch/mips/include/asm/mach-loongson32/regs-mux.h
+++ b/arch/mips/include/asm/mach-loongson32/regs-mux.h
@@ -19,49 +19,49 @@
 #define LS1X_MUX_CTRL1			LS1X_MUX_REG(0x4)
 
 /* MUX CTRL0 Register Bits */
-#define UART0_USE_PWM23			(0x1 << 28)
-#define UART0_USE_PWM01			(0x1 << 27)
-#define UART1_USE_LCD0_5_6_11		(0x1 << 26)
-#define I2C2_USE_CAN1			(0x1 << 25)
-#define I2C1_USE_CAN0			(0x1 << 24)
-#define NAND3_USE_UART5			(0x1 << 23)
-#define NAND3_USE_UART4			(0x1 << 22)
-#define NAND3_USE_UART1_DAT		(0x1 << 21)
-#define NAND3_USE_UART1_CTS		(0x1 << 20)
-#define NAND3_USE_PWM23			(0x1 << 19)
-#define NAND3_USE_PWM01			(0x1 << 18)
-#define NAND2_USE_UART5			(0x1 << 17)
-#define NAND2_USE_UART4			(0x1 << 16)
-#define NAND2_USE_UART1_DAT		(0x1 << 15)
-#define NAND2_USE_UART1_CTS		(0x1 << 14)
-#define NAND2_USE_PWM23			(0x1 << 13)
-#define NAND2_USE_PWM01			(0x1 << 12)
-#define NAND1_USE_UART5			(0x1 << 11)
-#define NAND1_USE_UART4			(0x1 << 10)
-#define NAND1_USE_UART1_DAT		(0x1 << 9)
-#define NAND1_USE_UART1_CTS		(0x1 << 8)
-#define NAND1_USE_PWM23			(0x1 << 7)
-#define NAND1_USE_PWM01			(0x1 << 6)
-#define GMAC1_USE_UART1			(0x1 << 4)
-#define GMAC1_USE_UART0			(0x1 << 3)
-#define LCD_USE_UART0_DAT		(0x1 << 2)
-#define LCD_USE_UART15			(0x1 << 1)
-#define LCD_USE_UART0			0x1
+#define UART0_USE_PWM23			BIT(28)
+#define UART0_USE_PWM01			BIT(27)
+#define UART1_USE_LCD0_5_6_11		BIT(26)
+#define I2C2_USE_CAN1			BIT(25)
+#define I2C1_USE_CAN0			BIT(24)
+#define NAND3_USE_UART5			BIT(23)
+#define NAND3_USE_UART4			BIT(22)
+#define NAND3_USE_UART1_DAT		BIT(21)
+#define NAND3_USE_UART1_CTS		BIT(20)
+#define NAND3_USE_PWM23			BIT(19)
+#define NAND3_USE_PWM01			BIT(18)
+#define NAND2_USE_UART5			BIT(17)
+#define NAND2_USE_UART4			BIT(16)
+#define NAND2_USE_UART1_DAT		BIT(15)
+#define NAND2_USE_UART1_CTS		BIT(14)
+#define NAND2_USE_PWM23			BIT(13)
+#define NAND2_USE_PWM01			BIT(12)
+#define NAND1_USE_UART5			BIT(11)
+#define NAND1_USE_UART4			BIT(10)
+#define NAND1_USE_UART1_DAT		BIT(9)
+#define NAND1_USE_UART1_CTS		BIT(8)
+#define NAND1_USE_PWM23			BIT(7)
+#define NAND1_USE_PWM01			BIT(6)
+#define GMAC1_USE_UART1			BIT(4)
+#define GMAC1_USE_UART0			BIT(3)
+#define LCD_USE_UART0_DAT		BIT(2)
+#define LCD_USE_UART15			BIT(1)
+#define LCD_USE_UART0			BIT(0)
 
 /* MUX CTRL1 Register Bits */
-#define USB_RESET			(0x1 << 31)
-#define SPI1_CS_USE_PWM01		(0x1 << 24)
-#define SPI1_USE_CAN			(0x1 << 23)
-#define DISABLE_DDR_CONFSPACE		(0x1 << 20)
-#define DDR32TO16EN			(0x1 << 16)
-#define GMAC1_SHUT			(0x1 << 13)
-#define GMAC0_SHUT			(0x1 << 12)
-#define USB_SHUT			(0x1 << 11)
-#define UART1_3_USE_CAN1		(0x1 << 5)
-#define UART1_2_USE_CAN0		(0x1 << 4)
-#define GMAC1_USE_TXCLK			(0x1 << 3)
-#define GMAC0_USE_TXCLK			(0x1 << 2)
-#define GMAC1_USE_PWM23			(0x1 << 1)
-#define GMAC0_USE_PWM01			0x1
+#define USB_RESET			BIT(31)
+#define SPI1_CS_USE_PWM01		BIT(24)
+#define SPI1_USE_CAN			BIT(23)
+#define DISABLE_DDR_CONFSPACE		BIT(20)
+#define DDR32TO16EN			BIT(16)
+#define GMAC1_SHUT			BIT(13)
+#define GMAC0_SHUT			BIT(12)
+#define USB_SHUT			BIT(11)
+#define UART1_3_USE_CAN1		BIT(5)
+#define UART1_2_USE_CAN0		BIT(4)
+#define GMAC1_USE_TXCLK			BIT(3)
+#define GMAC0_USE_TXCLK			BIT(2)
+#define GMAC1_USE_PWM23			BIT(1)
+#define GMAC0_USE_PWM01			BIT(0)
 
 #endif /* __ASM_MACH_LOONGSON32_REGS_MUX_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h
index 69f174e..4119600 100644
--- a/arch/mips/include/asm/mach-loongson32/regs-pwm.h
+++ b/arch/mips/include/asm/mach-loongson32/regs-pwm.h
@@ -19,11 +19,11 @@
 #define PWM_CTRL		0xc
 
 /* PWM Control Register Bits */
-#define CNT_RST			(0x1 << 7)
-#define INT_SR			(0x1 << 6)
-#define INT_EN			(0x1 << 5)
-#define PWM_SINGLE		(0x1 << 4)
-#define PWM_OE			(0x1 << 3)
-#define CNT_EN			0x1
+#define CNT_RST			BIT(7)
+#define INT_SR			BIT(6)
+#define INT_EN			BIT(5)
+#define PWM_SINGLE		BIT(4)
+#define PWM_OE			BIT(3)
+#define CNT_EN			BIT(0)
 
 #endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 98963c2..89328a3d 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -16,11 +16,6 @@
 #ifndef __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
 #define __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
 
-#define cpu_dcache_line_size()	32
-#define cpu_icache_line_size()	32
-#define cpu_scache_line_size()	32
-
-
 #define cpu_has_32fpr		1
 #define cpu_has_3k_cache	0
 #define cpu_has_4k_cache	1
@@ -31,24 +26,17 @@
 #define cpu_has_counter		1
 #define cpu_has_dc_aliases	(PAGE_SIZE < 0x4000)
 #define cpu_has_divec		0
-#define cpu_has_dsp		0
-#define cpu_has_dsp2		0
 #define cpu_has_ejtag		0
-#define cpu_has_ic_fills_f_dc	0
 #define cpu_has_inclusive_pcaches	1
 #define cpu_has_llsc		1
 #define cpu_has_mcheck		0
 #define cpu_has_mdmx		0
 #define cpu_has_mips16		0
-#define cpu_has_mips32r2	0
 #define cpu_has_mips3d		0
-#define cpu_has_mips64r2	0
 #define cpu_has_mipsmt		0
-#define cpu_has_prefetch	0
 #define cpu_has_smartmips	0
 #define cpu_has_tlb		1
 #define cpu_has_tx39_cache	0
-#define cpu_has_userlocal	0
 #define cpu_has_vce		0
 #define cpu_has_veic		0
 #define cpu_has_vint		0
@@ -56,6 +44,10 @@
 #define cpu_has_watch		1
 #define cpu_has_local_ebase	0
 
-#define cpu_has_wsbh		IS_ENABLED(CONFIG_CPU_LOONGSON3)
+#ifdef CONFIG_CPU_LOONGSON3
+#define cpu_has_wsbh		1
+#define cpu_has_ic_fills_f_dc	1
+#define cpu_hwrena_impl_bits	0xc0000000
+#endif
 
 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index 3f2f84f..8393bc54 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -23,8 +23,15 @@
 	or	t0, (0x1 << 7)
 	mtc0	t0, $16, 3
 	/* Set ELPA on LOONGSON3 pagegrain */
-	li	t0, (0x1 << 29)
+	mfc0	t0, $5, 1
+	or	t0, (0x1 << 29)
 	mtc0	t0, $5, 1
+#ifdef CONFIG_LOONGSON3_ENHANCEMENT
+	/* Enable STFill Buffer */
+	mfc0	t0, $16, 6
+	or	t0, 0x100
+	mtc0	t0, $16, 6
+#endif
 	_ehb
 	.set	pop
 #endif
@@ -42,8 +49,15 @@
 	or	t0, (0x1 << 7)
 	mtc0	t0, $16, 3
 	/* Set ELPA on LOONGSON3 pagegrain */
-	li	t0, (0x1 << 29)
+	mfc0	t0, $5, 1
+	or	t0, (0x1 << 29)
 	mtc0	t0, $5, 1
+#ifdef CONFIG_LOONGSON3_ENHANCEMENT
+	/* Enable STFill Buffer */
+	mfc0	t0, $16, 6
+	or	t0, 0x100
+	mtc0	t0, $16, 6
+#endif
 	_ehb
 	.set	pop
 #endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7620.h b/arch/mips/include/asm/mach-ralink/mt7620.h
index 455d406..a73350b 100644
--- a/arch/mips/include/asm/mach-ralink/mt7620.h
+++ b/arch/mips/include/asm/mach-ralink/mt7620.h
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #ifndef _MT7620_REGS_H_
@@ -72,6 +72,7 @@
 #define SYSCFG0_DRAM_TYPE_SDRAM		0
 #define SYSCFG0_DRAM_TYPE_DDR1		1
 #define SYSCFG0_DRAM_TYPE_DDR2		2
+#define SYSCFG0_DRAM_TYPE_UNKNOWN	3
 
 #define SYSCFG0_DRAM_TYPE_DDR2_MT7628	0
 #define SYSCFG0_DRAM_TYPE_DDR1_MT7628	1
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
index 610b61e..a672e06 100644
--- a/arch/mips/include/asm/mach-ralink/mt7621.h
+++ b/arch/mips/include/asm/mach-ralink/mt7621.h
@@ -3,7 +3,7 @@
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
- * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2015 John Crispin <john@phrozen.org>
  */
 
 #ifndef _MT7621_REGS_H_
diff --git a/arch/mips/include/asm/mach-ralink/pinmux.h b/arch/mips/include/asm/mach-ralink/pinmux.h
index be106cb..ba8ac33 100644
--- a/arch/mips/include/asm/mach-ralink/pinmux.h
+++ b/arch/mips/include/asm/mach-ralink/pinmux.h
@@ -3,7 +3,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  publishhed by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #ifndef _RT288X_PINMUX_H__
diff --git a/arch/mips/include/asm/mach-ralink/ralink_regs.h b/arch/mips/include/asm/mach-ralink/ralink_regs.h
index 4c9fba6..9df1a53 100644
--- a/arch/mips/include/asm/mach-ralink/ralink_regs.h
+++ b/arch/mips/include/asm/mach-ralink/ralink_regs.h
@@ -1,7 +1,7 @@
 /*
  *  Ralink SoC register definitions
  *
- *  Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2013 John Crispin <john@phrozen.org>
  *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  *
diff --git a/arch/mips/include/asm/mach-ralink/rt288x.h b/arch/mips/include/asm/mach-ralink/rt288x.h
index 03ad716..25ae104 100644
--- a/arch/mips/include/asm/mach-ralink/rt288x.h
+++ b/arch/mips/include/asm/mach-ralink/rt288x.h
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #ifndef _RT288X_REGS_H_
diff --git a/arch/mips/include/asm/mach-ralink/rt305x.h b/arch/mips/include/asm/mach-ralink/rt305x.h
index 2eea793..ac2d65c 100644
--- a/arch/mips/include/asm/mach-ralink/rt305x.h
+++ b/arch/mips/include/asm/mach-ralink/rt305x.h
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #ifndef _RT305X_REGS_H_
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index d463539..9411a4c 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -208,6 +208,7 @@
 BUILD_CM_RW(sys_config2,	MIPS_CM_GCB_OFS + 0x150)
 BUILD_CM_RW(l2_pft_control,	MIPS_CM_GCB_OFS + 0x300)
 BUILD_CM_RW(l2_pft_control_b,	MIPS_CM_GCB_OFS + 0x308)
+BUILD_CM_RW(bev_base,		MIPS_CM_GCB_OFS + 0x680)
 
 /* Core Local & Core Other register accessor functions */
 BUILD_CM_Cx_RW(reset_release,	0x00)
@@ -290,8 +291,8 @@
 #define CM_GCR_GIC_BASE_GICEN_MSK		(_ULCAST_(0x1) << 0)
 
 /* GCR_CPC_BASE register fields */
-#define CM_GCR_CPC_BASE_CPCBASE_SHF		17
-#define CM_GCR_CPC_BASE_CPCBASE_MSK		(_ULCAST_(0x7fff) << 17)
+#define CM_GCR_CPC_BASE_CPCBASE_SHF		15
+#define CM_GCR_CPC_BASE_CPCBASE_MSK		(_ULCAST_(0x1ffff) << 15)
 #define CM_GCR_CPC_BASE_CPCEN_SHF		0
 #define CM_GCR_CPC_BASE_CPCEN_MSK		(_ULCAST_(0x1) << 0)
 
@@ -461,7 +462,10 @@
 	if (mips_cm_revision() >= CM_REV_CM3)
 		return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
 
-	return smp_num_siblings;
+	if (config_enabled(CONFIG_SMP))
+		return smp_num_siblings;
+
+	return 1;
 }
 
 /**
@@ -505,7 +509,7 @@
 
 #else /* !CONFIG_MIPS_CM */
 
-static inline void mips_cm_lock_other(unsigned int core) { }
+static inline void mips_cm_lock_other(unsigned int core, unsigned int vp) { }
 static inline void mips_cm_unlock_other(void) { }
 
 #endif /* !CONFIG_MIPS_CM */
diff --git a/arch/mips/include/asm/mips-cpc.h b/arch/mips/include/asm/mips-cpc.h
index e09035239..8c519f9 100644
--- a/arch/mips/include/asm/mips-cpc.h
+++ b/arch/mips/include/asm/mips-cpc.h
@@ -106,6 +106,9 @@
 BUILD_CPC_Cx_RW(cmd,		0x00)
 BUILD_CPC_Cx_RW(stat_conf,	0x08)
 BUILD_CPC_Cx_RW(other,		0x10)
+BUILD_CPC_Cx_RW(vp_stop,	0x20)
+BUILD_CPC_Cx_RW(vp_run,		0x28)
+BUILD_CPC_Cx_RW(vp_running,	0x30)
 
 /* CPC_Cx_CMD register fields */
 #define CPC_Cx_CMD_SHF				0
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 3ad19ad..25d0157 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -55,8 +55,14 @@
 #define CP0_BADINSTR $8, 1
 #define CP0_COUNT $9
 #define CP0_ENTRYHI $10
+#define CP0_GUESTCTL1 $10, 4
+#define CP0_GUESTCTL2 $10, 5
+#define CP0_GUESTCTL3 $10, 6
 #define CP0_COMPARE $11
+#define CP0_GUESTCTL0EXT $11, 4
 #define CP0_STATUS $12
+#define CP0_GUESTCTL0 $12, 6
+#define CP0_GTOFFSET $12, 7
 #define CP0_CAUSE $13
 #define CP0_EPC $14
 #define CP0_PRID $15
@@ -229,6 +235,8 @@
 
 /* MIPS32/64 EntryHI bit definitions */
 #define MIPS_ENTRYHI_EHINV	(_ULCAST_(1) << 10)
+#define MIPS_ENTRYHI_ASIDX	(_ULCAST_(0x3) << 8)
+#define MIPS_ENTRYHI_ASID	(_ULCAST_(0xff) << 0)
 
 /*
  * R4x00 interrupt enable / cause bits
@@ -390,6 +398,8 @@
 #define	 CAUSEF_IP7		(_ULCAST_(1)   << 15)
 #define CAUSEB_FDCI		21
 #define CAUSEF_FDCI		(_ULCAST_(1)   << 21)
+#define CAUSEB_WP		22
+#define CAUSEF_WP		(_ULCAST_(1)   << 22)
 #define CAUSEB_IV		23
 #define CAUSEF_IV		(_ULCAST_(1)   << 23)
 #define CAUSEB_PCI		26
@@ -611,7 +621,8 @@
 #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14)
 #define MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT	(_ULCAST_(2) << 14)
 #define MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT	(_ULCAST_(3) << 14)
-#define MIPS_CONF4_KSCREXIST	(_ULCAST_(255) << 16)
+#define MIPS_CONF4_KSCREXIST_SHIFT	(16)
+#define MIPS_CONF4_KSCREXIST	(_ULCAST_(255) << MIPS_CONF4_KSCREXIST_SHIFT)
 #define MIPS_CONF4_VTLBSIZEEXT_SHIFT	(24)
 #define MIPS_CONF4_VTLBSIZEEXT	(_ULCAST_(15) << MIPS_CONF4_VTLBSIZEEXT_SHIFT)
 #define MIPS_CONF4_AE		(_ULCAST_(1) << 28)
@@ -623,6 +634,7 @@
 #define MIPS_CONF5_MRP		(_ULCAST_(1) << 3)
 #define MIPS_CONF5_LLB		(_ULCAST_(1) << 4)
 #define MIPS_CONF5_MVH		(_ULCAST_(1) << 5)
+#define MIPS_CONF5_VP		(_ULCAST_(1) << 7)
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
 #define MIPS_CONF5_MSAEN	(_ULCAST_(1) << 27)
@@ -633,6 +645,8 @@
 #define MIPS_CONF6_SYND		(_ULCAST_(1) << 13)
 /* proAptiv FTLB on/off bit */
 #define MIPS_CONF6_FTLBEN	(_ULCAST_(1) << 15)
+/* Loongson-3 FTLB on/off bit */
+#define MIPS_CONF6_FTLBDIS	(_ULCAST_(1) << 22)
 /* FTLB probability bits */
 #define MIPS_CONF6_FTLBP_SHIFT	(16)
 
@@ -645,12 +659,38 @@
 /* FTLB probability bits for R6 */
 #define MIPS_CONF7_FTLBP_SHIFT	(18)
 
+/* WatchLo* register definitions */
+#define MIPS_WATCHLO_IRW	(_ULCAST_(0x7) << 0)
+
+/* WatchHi* register definitions */
+#define MIPS_WATCHHI_M		(_ULCAST_(1) << 31)
+#define MIPS_WATCHHI_G		(_ULCAST_(1) << 30)
+#define MIPS_WATCHHI_WM		(_ULCAST_(0x3) << 28)
+#define MIPS_WATCHHI_WM_R_RVA	(_ULCAST_(0) << 28)
+#define MIPS_WATCHHI_WM_R_GPA	(_ULCAST_(1) << 28)
+#define MIPS_WATCHHI_WM_G_GVA	(_ULCAST_(2) << 28)
+#define MIPS_WATCHHI_EAS	(_ULCAST_(0x3) << 24)
+#define MIPS_WATCHHI_ASID	(_ULCAST_(0xff) << 16)
+#define MIPS_WATCHHI_MASK	(_ULCAST_(0x1ff) << 3)
+#define MIPS_WATCHHI_I		(_ULCAST_(1) << 2)
+#define MIPS_WATCHHI_R		(_ULCAST_(1) << 1)
+#define MIPS_WATCHHI_W		(_ULCAST_(1) << 0)
+#define MIPS_WATCHHI_IRW	(_ULCAST_(0x7) << 0)
+
 /* MAAR bit definitions */
 #define MIPS_MAAR_ADDR		((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12)
 #define MIPS_MAAR_ADDR_SHIFT	12
 #define MIPS_MAAR_S		(_ULCAST_(1) << 1)
 #define MIPS_MAAR_V		(_ULCAST_(1) << 0)
 
+/* EBase bit definitions */
+#define MIPS_EBASE_CPUNUM_SHIFT	0
+#define MIPS_EBASE_CPUNUM	(_ULCAST_(0x3ff) << 0)
+#define MIPS_EBASE_WG_SHIFT	11
+#define MIPS_EBASE_WG		(_ULCAST_(1) << 11)
+#define MIPS_EBASE_BASE_SHIFT	12
+#define MIPS_EBASE_BASE		(~_ULCAST_((1 << MIPS_EBASE_BASE_SHIFT) - 1))
+
 /* CMGCRBase bit definitions */
 #define MIPS_CMGCRB_BASE	11
 #define MIPS_CMGCRF_BASE	(~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1))
@@ -706,6 +746,94 @@
 #define MIPS_PWCTL_PSN_SHIFT	0
 #define MIPS_PWCTL_PSN_MASK	0x0000003f
 
+/* GuestCtl0 fields */
+#define MIPS_GCTL0_GM_SHIFT	31
+#define MIPS_GCTL0_GM		(_ULCAST_(1) << MIPS_GCTL0_GM_SHIFT)
+#define MIPS_GCTL0_RI_SHIFT	30
+#define MIPS_GCTL0_RI		(_ULCAST_(1) << MIPS_GCTL0_RI_SHIFT)
+#define MIPS_GCTL0_MC_SHIFT	29
+#define MIPS_GCTL0_MC		(_ULCAST_(1) << MIPS_GCTL0_MC_SHIFT)
+#define MIPS_GCTL0_CP0_SHIFT	28
+#define MIPS_GCTL0_CP0		(_ULCAST_(1) << MIPS_GCTL0_CP0_SHIFT)
+#define MIPS_GCTL0_AT_SHIFT	26
+#define MIPS_GCTL0_AT		(_ULCAST_(0x3) << MIPS_GCTL0_AT_SHIFT)
+#define MIPS_GCTL0_GT_SHIFT	25
+#define MIPS_GCTL0_GT		(_ULCAST_(1) << MIPS_GCTL0_GT_SHIFT)
+#define MIPS_GCTL0_CG_SHIFT	24
+#define MIPS_GCTL0_CG		(_ULCAST_(1) << MIPS_GCTL0_CG_SHIFT)
+#define MIPS_GCTL0_CF_SHIFT	23
+#define MIPS_GCTL0_CF		(_ULCAST_(1) << MIPS_GCTL0_CF_SHIFT)
+#define MIPS_GCTL0_G1_SHIFT	22
+#define MIPS_GCTL0_G1		(_ULCAST_(1) << MIPS_GCTL0_G1_SHIFT)
+#define MIPS_GCTL0_G0E_SHIFT	19
+#define MIPS_GCTL0_G0E		(_ULCAST_(1) << MIPS_GCTL0_G0E_SHIFT)
+#define MIPS_GCTL0_PT_SHIFT	18
+#define MIPS_GCTL0_PT		(_ULCAST_(1) << MIPS_GCTL0_PT_SHIFT)
+#define MIPS_GCTL0_RAD_SHIFT	9
+#define MIPS_GCTL0_RAD		(_ULCAST_(1) << MIPS_GCTL0_RAD_SHIFT)
+#define MIPS_GCTL0_DRG_SHIFT	8
+#define MIPS_GCTL0_DRG		(_ULCAST_(1) << MIPS_GCTL0_DRG_SHIFT)
+#define MIPS_GCTL0_G2_SHIFT	7
+#define MIPS_GCTL0_G2		(_ULCAST_(1) << MIPS_GCTL0_G2_SHIFT)
+#define MIPS_GCTL0_GEXC_SHIFT	2
+#define MIPS_GCTL0_GEXC		(_ULCAST_(0x1f) << MIPS_GCTL0_GEXC_SHIFT)
+#define MIPS_GCTL0_SFC2_SHIFT	1
+#define MIPS_GCTL0_SFC2		(_ULCAST_(1) << MIPS_GCTL0_SFC2_SHIFT)
+#define MIPS_GCTL0_SFC1_SHIFT	0
+#define MIPS_GCTL0_SFC1		(_ULCAST_(1) << MIPS_GCTL0_SFC1_SHIFT)
+
+/* GuestCtl0.AT Guest address translation control */
+#define MIPS_GCTL0_AT_ROOT	1  /* Guest MMU under Root control */
+#define MIPS_GCTL0_AT_GUEST	3  /* Guest MMU under Guest control */
+
+/* GuestCtl0.GExcCode Hypervisor exception cause codes */
+#define MIPS_GCTL0_GEXC_GPSI	0  /* Guest Privileged Sensitive Instruction */
+#define MIPS_GCTL0_GEXC_GSFC	1  /* Guest Software Field Change */
+#define MIPS_GCTL0_GEXC_HC	2  /* Hypercall */
+#define MIPS_GCTL0_GEXC_GRR	3  /* Guest Reserved Instruction Redirect */
+#define MIPS_GCTL0_GEXC_GVA	8  /* Guest Virtual Address available */
+#define MIPS_GCTL0_GEXC_GHFC	9  /* Guest Hardware Field Change */
+#define MIPS_GCTL0_GEXC_GPA	10 /* Guest Physical Address available */
+
+/* GuestCtl0Ext fields */
+#define MIPS_GCTL0EXT_RPW_SHIFT	8
+#define MIPS_GCTL0EXT_RPW	(_ULCAST_(0x3) << MIPS_GCTL0EXT_RPW_SHIFT)
+#define MIPS_GCTL0EXT_NCC_SHIFT	6
+#define MIPS_GCTL0EXT_NCC	(_ULCAST_(0x3) << MIPS_GCTL0EXT_NCC_SHIFT)
+#define MIPS_GCTL0EXT_CGI_SHIFT	4
+#define MIPS_GCTL0EXT_CGI	(_ULCAST_(1) << MIPS_GCTL0EXT_CGI_SHIFT)
+#define MIPS_GCTL0EXT_FCD_SHIFT	3
+#define MIPS_GCTL0EXT_FCD	(_ULCAST_(1) << MIPS_GCTL0EXT_FCD_SHIFT)
+#define MIPS_GCTL0EXT_OG_SHIFT	2
+#define MIPS_GCTL0EXT_OG	(_ULCAST_(1) << MIPS_GCTL0EXT_OG_SHIFT)
+#define MIPS_GCTL0EXT_BG_SHIFT	1
+#define MIPS_GCTL0EXT_BG	(_ULCAST_(1) << MIPS_GCTL0EXT_BG_SHIFT)
+#define MIPS_GCTL0EXT_MG_SHIFT	0
+#define MIPS_GCTL0EXT_MG	(_ULCAST_(1) << MIPS_GCTL0EXT_MG_SHIFT)
+
+/* GuestCtl0Ext.RPW Root page walk configuration */
+#define MIPS_GCTL0EXT_RPW_BOTH	0  /* Root PW for GPA->RPA and RVA->RPA */
+#define MIPS_GCTL0EXT_RPW_GPA	2  /* Root PW for GPA->RPA */
+#define MIPS_GCTL0EXT_RPW_RVA	3  /* Root PW for RVA->RPA */
+
+/* GuestCtl0Ext.NCC Nested cache coherency attributes */
+#define MIPS_GCTL0EXT_NCC_IND	0  /* Guest CCA independent of Root CCA */
+#define MIPS_GCTL0EXT_NCC_MOD	1  /* Guest CCA modified by Root CCA */
+
+/* GuestCtl1 fields */
+#define MIPS_GCTL1_ID_SHIFT	0
+#define MIPS_GCTL1_ID_WIDTH	8
+#define MIPS_GCTL1_ID		(_ULCAST_(0xff) << MIPS_GCTL1_ID_SHIFT)
+#define MIPS_GCTL1_RID_SHIFT	16
+#define MIPS_GCTL1_RID_WIDTH	8
+#define MIPS_GCTL1_RID		(_ULCAST_(0xff) << MIPS_GCTL1_RID_SHIFT)
+#define MIPS_GCTL1_EID_SHIFT	24
+#define MIPS_GCTL1_EID_WIDTH	8
+#define MIPS_GCTL1_EID		(_ULCAST_(0xff) << MIPS_GCTL1_EID_SHIFT)
+
+/* GuestID reserved for root context */
+#define MIPS_GCTL1_ROOT_GUESTID	0
+
 /* CDMMBase register bit definitions */
 #define MIPS_CDMMBASE_SIZE_SHIFT 0
 #define MIPS_CDMMBASE_SIZE	(_ULCAST_(511) << MIPS_CDMMBASE_SIZE_SHIFT)
@@ -757,6 +885,15 @@
 /* Disable Branch Return Cache */
 #define R10K_DIAG_D_BRC		(_ULCAST_(1) << 22)
 
+/* Flush ITLB */
+#define LOONGSON_DIAG_ITLB	(_ULCAST_(1) << 2)
+/* Flush DTLB */
+#define LOONGSON_DIAG_DTLB	(_ULCAST_(1) << 3)
+/* Flush VTLB */
+#define LOONGSON_DIAG_VTLB	(_ULCAST_(1) << 12)
+/* Flush FTLB */
+#define LOONGSON_DIAG_FTLB	(_ULCAST_(1) << 13)
+
 /*
  * Coprocessor 1 (FPU) register names
  */
@@ -1186,9 +1323,15 @@
 #define read_c0_context()	__read_ulong_c0_register($4, 0)
 #define write_c0_context(val)	__write_ulong_c0_register($4, 0, val)
 
+#define read_c0_contextconfig()		__read_32bit_c0_register($4, 1)
+#define write_c0_contextconfig(val)	__write_32bit_c0_register($4, 1, val)
+
 #define read_c0_userlocal()	__read_ulong_c0_register($4, 2)
 #define write_c0_userlocal(val) __write_ulong_c0_register($4, 2, val)
 
+#define read_c0_xcontextconfig()	__read_ulong_c0_register($4, 3)
+#define write_c0_xcontextconfig(val)	__write_ulong_c0_register($4, 3, val)
+
 #define read_c0_pagemask()	__read_32bit_c0_register($5, 0)
 #define write_c0_pagemask(val)	__write_32bit_c0_register($5, 0, val)
 
@@ -1206,6 +1349,9 @@
 #define read_c0_badvaddr()	__read_ulong_c0_register($8, 0)
 #define write_c0_badvaddr(val)	__write_ulong_c0_register($8, 0, val)
 
+#define read_c0_badinstr()	__read_32bit_c0_register($8, 1)
+#define read_c0_badinstrp()	__read_32bit_c0_register($8, 2)
+
 #define read_c0_count()		__read_32bit_c0_register($9, 0)
 #define write_c0_count(val)	__write_32bit_c0_register($9, 0, val)
 
@@ -1218,9 +1364,21 @@
 #define read_c0_entryhi()	__read_ulong_c0_register($10, 0)
 #define write_c0_entryhi(val)	__write_ulong_c0_register($10, 0, val)
 
+#define read_c0_guestctl1()	__read_32bit_c0_register($10, 4)
+#define write_c0_guestctl1(val)	__write_32bit_c0_register($10, 4, val)
+
+#define read_c0_guestctl2()	__read_32bit_c0_register($10, 5)
+#define write_c0_guestctl2(val)	__write_32bit_c0_register($10, 5, val)
+
+#define read_c0_guestctl3()	__read_32bit_c0_register($10, 6)
+#define write_c0_guestctl3(val)	__write_32bit_c0_register($10, 6, val)
+
 #define read_c0_compare()	__read_32bit_c0_register($11, 0)
 #define write_c0_compare(val)	__write_32bit_c0_register($11, 0, val)
 
+#define read_c0_guestctl0ext()	__read_32bit_c0_register($11, 4)
+#define write_c0_guestctl0ext(val) __write_32bit_c0_register($11, 4, val)
+
 #define read_c0_compare2()	__read_32bit_c0_register($11, 6) /* pnx8550 */
 #define write_c0_compare2(val)	__write_32bit_c0_register($11, 6, val)
 
@@ -1231,6 +1389,12 @@
 
 #define write_c0_status(val)	__write_32bit_c0_register($12, 0, val)
 
+#define read_c0_guestctl0()	__read_32bit_c0_register($12, 6)
+#define write_c0_guestctl0(val)	__write_32bit_c0_register($12, 6, val)
+
+#define read_c0_gtoffset()	__read_32bit_c0_register($12, 7)
+#define write_c0_gtoffset(val)	__write_32bit_c0_register($12, 7, val)
+
 #define read_c0_cause()		__read_32bit_c0_register($13, 0)
 #define write_c0_cause(val)	__write_32bit_c0_register($13, 0, val)
 
@@ -1416,6 +1580,9 @@
 #define read_c0_ebase()		__read_32bit_c0_register($15, 1)
 #define write_c0_ebase(val)	__write_32bit_c0_register($15, 1, val)
 
+#define read_c0_ebase_64()	__read_64bit_c0_register($15, 1)
+#define write_c0_ebase_64(val)	__write_64bit_c0_register($15, 1, val)
+
 #define read_c0_cdmmbase()	__read_ulong_c0_register($15, 2)
 #define write_c0_cdmmbase(val)	__write_ulong_c0_register($15, 2, val)
 
@@ -1442,6 +1609,12 @@
 #define read_c0_pwctl()		__read_32bit_c0_register($6, 6)
 #define write_c0_pwctl(val)	__write_32bit_c0_register($6, 6, val)
 
+#define read_c0_pgd()		__read_64bit_c0_register($9, 7)
+#define write_c0_pgd(val)	__write_64bit_c0_register($9, 7, val)
+
+#define read_c0_kpgd()		__read_64bit_c0_register($31, 7)
+#define write_c0_kpgd(val)	__write_64bit_c0_register($31, 7, val)
+
 /* Cavium OCTEON (cnMIPS) */
 #define read_c0_cvmcount()	__read_ulong_c0_register($9, 6)
 #define write_c0_cvmcount(val)	__write_ulong_c0_register($9, 6, val)
@@ -1507,6 +1680,317 @@
 #define write_c0_brcm_sleepcount(val)	__write_32bit_c0_register($22, 7, val)
 
 /*
+ * Macros to access the guest system control coprocessor
+ */
+
+#ifdef TOOLCHAIN_SUPPORTS_VIRT
+
+#define __read_32bit_gc0_register(source, sel)				\
+({ int __res;								\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tmips32r2\n\t"					\
+		".set\tvirt\n\t"					\
+		"mfgc0\t%0, $%1, %2\n\t"				\
+		".set\tpop"						\
+		: "=r" (__res)						\
+		: "i" (source), "i" (sel));				\
+	__res;								\
+})
+
+#define __read_64bit_gc0_register(source, sel)				\
+({ unsigned long long __res;						\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tmips64r2\n\t"					\
+		".set\tvirt\n\t"					\
+		"dmfgc0\t%0, $%1, %2\n\t"			\
+		".set\tpop"						\
+		: "=r" (__res)						\
+		: "i" (source), "i" (sel));				\
+	__res;								\
+})
+
+#define __write_32bit_gc0_register(register, sel, value)		\
+do {									\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tmips32r2\n\t"					\
+		".set\tvirt\n\t"					\
+		"mtgc0\t%z0, $%1, %2\n\t"				\
+		".set\tpop"						\
+		: : "Jr" ((unsigned int)(value)),			\
+		    "i" (register), "i" (sel));				\
+} while (0)
+
+#define __write_64bit_gc0_register(register, sel, value)		\
+do {									\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tmips64r2\n\t"					\
+		".set\tvirt\n\t"					\
+		"dmtgc0\t%z0, $%1, %2\n\t"				\
+		".set\tpop"						\
+		: : "Jr" (value),					\
+		    "i" (register), "i" (sel));				\
+} while (0)
+
+#else	/* TOOLCHAIN_SUPPORTS_VIRT */
+
+#define __read_32bit_gc0_register(source, sel)				\
+({ int __res;								\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tnoat\n\t"					\
+		"# mfgc0\t$1, $%1, %2\n\t"				\
+		".word\t(0x40610000 | %1 << 11 | %2)\n\t"		\
+		"move\t%0, $1\n\t"					\
+		".set\tpop"						\
+		: "=r" (__res)						\
+		: "i" (source), "i" (sel));				\
+	__res;								\
+})
+
+#define __read_64bit_gc0_register(source, sel)				\
+({ unsigned long long __res;						\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tnoat\n\t"					\
+		"# dmfgc0\t$1, $%1, %2\n\t"				\
+		".word\t(0x40610100 | %1 << 11 | %2)\n\t"		\
+		"move\t%0, $1\n\t"					\
+		".set\tpop"						\
+		: "=r" (__res)						\
+		: "i" (source), "i" (sel));				\
+	__res;								\
+})
+
+#define __write_32bit_gc0_register(register, sel, value)		\
+do {									\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tnoat\n\t"					\
+		"move\t$1, %0\n\t"					\
+		"# mtgc0\t$1, $%1, %2\n\t"				\
+		".word\t(0x40610200 | %1 << 11 | %2)\n\t"		\
+		".set\tpop"						\
+		: : "Jr" ((unsigned int)(value)),			\
+		    "i" (register), "i" (sel));				\
+} while (0)
+
+#define __write_64bit_gc0_register(register, sel, value)		\
+do {									\
+	__asm__ __volatile__(						\
+		".set\tpush\n\t"					\
+		".set\tnoat\n\t"					\
+		"move\t$1, %0\n\t"					\
+		"# dmtgc0\t$1, $%1, %2\n\t"				\
+		".word\t(0x40610300 | %1 << 11 | %2)\n\t"		\
+		".set\tpop"						\
+		: : "Jr" (value),					\
+		    "i" (register), "i" (sel));				\
+} while (0)
+
+#endif	/* !TOOLCHAIN_SUPPORTS_VIRT */
+
+#define __read_ulong_gc0_register(reg, sel)				\
+	((sizeof(unsigned long) == 4) ?					\
+	(unsigned long) __read_32bit_gc0_register(reg, sel) :		\
+	(unsigned long) __read_64bit_gc0_register(reg, sel))
+
+#define __write_ulong_gc0_register(reg, sel, val)			\
+do {									\
+	if (sizeof(unsigned long) == 4)					\
+		__write_32bit_gc0_register(reg, sel, val);		\
+	else								\
+		__write_64bit_gc0_register(reg, sel, val);		\
+} while (0)
+
+#define read_gc0_index()		__read_32bit_gc0_register(0, 0)
+#define write_gc0_index(val)		__write_32bit_gc0_register(0, 0, val)
+
+#define read_gc0_entrylo0()		__read_ulong_gc0_register(2, 0)
+#define write_gc0_entrylo0(val)		__write_ulong_gc0_register(2, 0, val)
+
+#define read_gc0_entrylo1()		__read_ulong_gc0_register(3, 0)
+#define write_gc0_entrylo1(val)		__write_ulong_gc0_register(3, 0, val)
+
+#define read_gc0_context()		__read_ulong_gc0_register(4, 0)
+#define write_gc0_context(val)		__write_ulong_gc0_register(4, 0, val)
+
+#define read_gc0_contextconfig()	__read_32bit_gc0_register(4, 1)
+#define write_gc0_contextconfig(val)	__write_32bit_gc0_register(4, 1, val)
+
+#define read_gc0_userlocal()		__read_ulong_gc0_register(4, 2)
+#define write_gc0_userlocal(val)	__write_ulong_gc0_register(4, 2, val)
+
+#define read_gc0_xcontextconfig()	__read_ulong_gc0_register(4, 3)
+#define write_gc0_xcontextconfig(val)	__write_ulong_gc0_register(4, 3, val)
+
+#define read_gc0_pagemask()		__read_32bit_gc0_register(5, 0)
+#define write_gc0_pagemask(val)		__write_32bit_gc0_register(5, 0, val)
+
+#define read_gc0_pagegrain()		__read_32bit_gc0_register(5, 1)
+#define write_gc0_pagegrain(val)	__write_32bit_gc0_register(5, 1, val)
+
+#define read_gc0_segctl0()		__read_ulong_gc0_register(5, 2)
+#define write_gc0_segctl0(val)		__write_ulong_gc0_register(5, 2, val)
+
+#define read_gc0_segctl1()		__read_ulong_gc0_register(5, 3)
+#define write_gc0_segctl1(val)		__write_ulong_gc0_register(5, 3, val)
+
+#define read_gc0_segctl2()		__read_ulong_gc0_register(5, 4)
+#define write_gc0_segctl2(val)		__write_ulong_gc0_register(5, 4, val)
+
+#define read_gc0_pwbase()		__read_ulong_gc0_register(5, 5)
+#define write_gc0_pwbase(val)		__write_ulong_gc0_register(5, 5, val)
+
+#define read_gc0_pwfield()		__read_ulong_gc0_register(5, 6)
+#define write_gc0_pwfield(val)		__write_ulong_gc0_register(5, 6, val)
+
+#define read_gc0_pwsize()		__read_ulong_gc0_register(5, 7)
+#define write_gc0_pwsize(val)		__write_ulong_gc0_register(5, 7, val)
+
+#define read_gc0_wired()		__read_32bit_gc0_register(6, 0)
+#define write_gc0_wired(val)		__write_32bit_gc0_register(6, 0, val)
+
+#define read_gc0_pwctl()		__read_32bit_gc0_register(6, 6)
+#define write_gc0_pwctl(val)		__write_32bit_gc0_register(6, 6, val)
+
+#define read_gc0_hwrena()		__read_32bit_gc0_register(7, 0)
+#define write_gc0_hwrena(val)		__write_32bit_gc0_register(7, 0, val)
+
+#define read_gc0_badvaddr()		__read_ulong_gc0_register(8, 0)
+#define write_gc0_badvaddr(val)		__write_ulong_gc0_register(8, 0, val)
+
+#define read_gc0_badinstr()		__read_32bit_gc0_register(8, 1)
+#define write_gc0_badinstr(val)		__write_32bit_gc0_register(8, 1, val)
+
+#define read_gc0_badinstrp()		__read_32bit_gc0_register(8, 2)
+#define write_gc0_badinstrp(val)	__write_32bit_gc0_register(8, 2, val)
+
+#define read_gc0_count()		__read_32bit_gc0_register(9, 0)
+
+#define read_gc0_entryhi()		__read_ulong_gc0_register(10, 0)
+#define write_gc0_entryhi(val)		__write_ulong_gc0_register(10, 0, val)
+
+#define read_gc0_compare()		__read_32bit_gc0_register(11, 0)
+#define write_gc0_compare(val)		__write_32bit_gc0_register(11, 0, val)
+
+#define read_gc0_status()		__read_32bit_gc0_register(12, 0)
+#define write_gc0_status(val)		__write_32bit_gc0_register(12, 0, val)
+
+#define read_gc0_intctl()		__read_32bit_gc0_register(12, 1)
+#define write_gc0_intctl(val)		__write_32bit_gc0_register(12, 1, val)
+
+#define read_gc0_cause()		__read_32bit_gc0_register(13, 0)
+#define write_gc0_cause(val)		__write_32bit_gc0_register(13, 0, val)
+
+#define read_gc0_epc()			__read_ulong_gc0_register(14, 0)
+#define write_gc0_epc(val)		__write_ulong_gc0_register(14, 0, val)
+
+#define read_gc0_ebase()		__read_32bit_gc0_register(15, 1)
+#define write_gc0_ebase(val)		__write_32bit_gc0_register(15, 1, val)
+
+#define read_gc0_ebase_64()		__read_64bit_gc0_register(15, 1)
+#define write_gc0_ebase_64(val)		__write_64bit_gc0_register(15, 1, val)
+
+#define read_gc0_config()		__read_32bit_gc0_register(16, 0)
+#define read_gc0_config1()		__read_32bit_gc0_register(16, 1)
+#define read_gc0_config2()		__read_32bit_gc0_register(16, 2)
+#define read_gc0_config3()		__read_32bit_gc0_register(16, 3)
+#define read_gc0_config4()		__read_32bit_gc0_register(16, 4)
+#define read_gc0_config5()		__read_32bit_gc0_register(16, 5)
+#define read_gc0_config6()		__read_32bit_gc0_register(16, 6)
+#define read_gc0_config7()		__read_32bit_gc0_register(16, 7)
+#define write_gc0_config(val)		__write_32bit_gc0_register(16, 0, val)
+#define write_gc0_config1(val)		__write_32bit_gc0_register(16, 1, val)
+#define write_gc0_config2(val)		__write_32bit_gc0_register(16, 2, val)
+#define write_gc0_config3(val)		__write_32bit_gc0_register(16, 3, val)
+#define write_gc0_config4(val)		__write_32bit_gc0_register(16, 4, val)
+#define write_gc0_config5(val)		__write_32bit_gc0_register(16, 5, val)
+#define write_gc0_config6(val)		__write_32bit_gc0_register(16, 6, val)
+#define write_gc0_config7(val)		__write_32bit_gc0_register(16, 7, val)
+
+#define read_gc0_watchlo0()		__read_ulong_gc0_register(18, 0)
+#define read_gc0_watchlo1()		__read_ulong_gc0_register(18, 1)
+#define read_gc0_watchlo2()		__read_ulong_gc0_register(18, 2)
+#define read_gc0_watchlo3()		__read_ulong_gc0_register(18, 3)
+#define read_gc0_watchlo4()		__read_ulong_gc0_register(18, 4)
+#define read_gc0_watchlo5()		__read_ulong_gc0_register(18, 5)
+#define read_gc0_watchlo6()		__read_ulong_gc0_register(18, 6)
+#define read_gc0_watchlo7()		__read_ulong_gc0_register(18, 7)
+#define write_gc0_watchlo0(val)		__write_ulong_gc0_register(18, 0, val)
+#define write_gc0_watchlo1(val)		__write_ulong_gc0_register(18, 1, val)
+#define write_gc0_watchlo2(val)		__write_ulong_gc0_register(18, 2, val)
+#define write_gc0_watchlo3(val)		__write_ulong_gc0_register(18, 3, val)
+#define write_gc0_watchlo4(val)		__write_ulong_gc0_register(18, 4, val)
+#define write_gc0_watchlo5(val)		__write_ulong_gc0_register(18, 5, val)
+#define write_gc0_watchlo6(val)		__write_ulong_gc0_register(18, 6, val)
+#define write_gc0_watchlo7(val)		__write_ulong_gc0_register(18, 7, val)
+
+#define read_gc0_watchhi0()		__read_32bit_gc0_register(19, 0)
+#define read_gc0_watchhi1()		__read_32bit_gc0_register(19, 1)
+#define read_gc0_watchhi2()		__read_32bit_gc0_register(19, 2)
+#define read_gc0_watchhi3()		__read_32bit_gc0_register(19, 3)
+#define read_gc0_watchhi4()		__read_32bit_gc0_register(19, 4)
+#define read_gc0_watchhi5()		__read_32bit_gc0_register(19, 5)
+#define read_gc0_watchhi6()		__read_32bit_gc0_register(19, 6)
+#define read_gc0_watchhi7()		__read_32bit_gc0_register(19, 7)
+#define write_gc0_watchhi0(val)		__write_32bit_gc0_register(19, 0, val)
+#define write_gc0_watchhi1(val)		__write_32bit_gc0_register(19, 1, val)
+#define write_gc0_watchhi2(val)		__write_32bit_gc0_register(19, 2, val)
+#define write_gc0_watchhi3(val)		__write_32bit_gc0_register(19, 3, val)
+#define write_gc0_watchhi4(val)		__write_32bit_gc0_register(19, 4, val)
+#define write_gc0_watchhi5(val)		__write_32bit_gc0_register(19, 5, val)
+#define write_gc0_watchhi6(val)		__write_32bit_gc0_register(19, 6, val)
+#define write_gc0_watchhi7(val)		__write_32bit_gc0_register(19, 7, val)
+
+#define read_gc0_xcontext()		__read_ulong_gc0_register(20, 0)
+#define write_gc0_xcontext(val)		__write_ulong_gc0_register(20, 0, val)
+
+#define read_gc0_perfctrl0()		__read_32bit_gc0_register(25, 0)
+#define write_gc0_perfctrl0(val)	__write_32bit_gc0_register(25, 0, val)
+#define read_gc0_perfcntr0()		__read_32bit_gc0_register(25, 1)
+#define write_gc0_perfcntr0(val)	__write_32bit_gc0_register(25, 1, val)
+#define read_gc0_perfcntr0_64()		__read_64bit_gc0_register(25, 1)
+#define write_gc0_perfcntr0_64(val)	__write_64bit_gc0_register(25, 1, val)
+#define read_gc0_perfctrl1()		__read_32bit_gc0_register(25, 2)
+#define write_gc0_perfctrl1(val)	__write_32bit_gc0_register(25, 2, val)
+#define read_gc0_perfcntr1()		__read_32bit_gc0_register(25, 3)
+#define write_gc0_perfcntr1(val)	__write_32bit_gc0_register(25, 3, val)
+#define read_gc0_perfcntr1_64()		__read_64bit_gc0_register(25, 3)
+#define write_gc0_perfcntr1_64(val)	__write_64bit_gc0_register(25, 3, val)
+#define read_gc0_perfctrl2()		__read_32bit_gc0_register(25, 4)
+#define write_gc0_perfctrl2(val)	__write_32bit_gc0_register(25, 4, val)
+#define read_gc0_perfcntr2()		__read_32bit_gc0_register(25, 5)
+#define write_gc0_perfcntr2(val)	__write_32bit_gc0_register(25, 5, val)
+#define read_gc0_perfcntr2_64()		__read_64bit_gc0_register(25, 5)
+#define write_gc0_perfcntr2_64(val)	__write_64bit_gc0_register(25, 5, val)
+#define read_gc0_perfctrl3()		__read_32bit_gc0_register(25, 6)
+#define write_gc0_perfctrl3(val)	__write_32bit_gc0_register(25, 6, val)
+#define read_gc0_perfcntr3()		__read_32bit_gc0_register(25, 7)
+#define write_gc0_perfcntr3(val)	__write_32bit_gc0_register(25, 7, val)
+#define read_gc0_perfcntr3_64()		__read_64bit_gc0_register(25, 7)
+#define write_gc0_perfcntr3_64(val)	__write_64bit_gc0_register(25, 7, val)
+
+#define read_gc0_errorepc()		__read_ulong_gc0_register(30, 0)
+#define write_gc0_errorepc(val)		__write_ulong_gc0_register(30, 0, val)
+
+#define read_gc0_kscratch1()		__read_ulong_gc0_register(31, 2)
+#define read_gc0_kscratch2()		__read_ulong_gc0_register(31, 3)
+#define read_gc0_kscratch3()		__read_ulong_gc0_register(31, 4)
+#define read_gc0_kscratch4()		__read_ulong_gc0_register(31, 5)
+#define read_gc0_kscratch5()		__read_ulong_gc0_register(31, 6)
+#define read_gc0_kscratch6()		__read_ulong_gc0_register(31, 7)
+#define write_gc0_kscratch1(val)	__write_ulong_gc0_register(31, 2, val)
+#define write_gc0_kscratch2(val)	__write_ulong_gc0_register(31, 3, val)
+#define write_gc0_kscratch3(val)	__write_ulong_gc0_register(31, 4, val)
+#define write_gc0_kscratch4(val)	__write_ulong_gc0_register(31, 5, val)
+#define write_gc0_kscratch5(val)	__write_ulong_gc0_register(31, 6, val)
+#define write_gc0_kscratch6(val)	__write_ulong_gc0_register(31, 7, val)
+
+/*
  * Macros to access the floating point coprocessor control registers
  */
 #define _read_32bit_cp1_register(source, gas_hardfloat)			\
@@ -2001,47 +2485,159 @@
 		".set reorder");
 }
 
+#ifdef TOOLCHAIN_SUPPORTS_VIRT
+
 /*
- * Manipulate bits in a c0 register.
+ * Guest TLB operations.
+ *
+ * It is responsibility of the caller to take care of any TLB hazards.
  */
-#define __BUILD_SET_C0(name)					\
+static inline void guest_tlb_probe(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".set virt\n\t"
+		"tlbgp\n\t"
+		".set pop");
+}
+
+static inline void guest_tlb_read(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".set virt\n\t"
+		"tlbgr\n\t"
+		".set pop");
+}
+
+static inline void guest_tlb_write_indexed(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".set virt\n\t"
+		"tlbgwi\n\t"
+		".set pop");
+}
+
+static inline void guest_tlb_write_random(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".set virt\n\t"
+		"tlbgwr\n\t"
+		".set pop");
+}
+
+/*
+ * Guest TLB Invalidate Flush
+ */
+static inline void guest_tlbinvf(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".set virt\n\t"
+		"tlbginvf\n\t"
+		".set pop");
+}
+
+#else	/* TOOLCHAIN_SUPPORTS_VIRT */
+
+/*
+ * Guest TLB operations.
+ *
+ * It is responsibility of the caller to take care of any TLB hazards.
+ */
+static inline void guest_tlb_probe(void)
+{
+	__asm__ __volatile__(
+		"# tlbgp\n\t"
+		".word 0x42000010");
+}
+
+static inline void guest_tlb_read(void)
+{
+	__asm__ __volatile__(
+		"# tlbgr\n\t"
+		".word 0x42000009");
+}
+
+static inline void guest_tlb_write_indexed(void)
+{
+	__asm__ __volatile__(
+		"# tlbgwi\n\t"
+		".word 0x4200000a");
+}
+
+static inline void guest_tlb_write_random(void)
+{
+	__asm__ __volatile__(
+		"# tlbgwr\n\t"
+		".word 0x4200000e");
+}
+
+/*
+ * Guest TLB Invalidate Flush
+ */
+static inline void guest_tlbinvf(void)
+{
+	__asm__ __volatile__(
+		"# tlbginvf\n\t"
+		".word 0x4200000c");
+}
+
+#endif	/* !TOOLCHAIN_SUPPORTS_VIRT */
+
+/*
+ * Manipulate bits in a register.
+ */
+#define __BUILD_SET_COMMON(name)				\
 static inline unsigned int					\
-set_c0_##name(unsigned int set)					\
+set_##name(unsigned int set)					\
 {								\
 	unsigned int res, new;					\
 								\
-	res = read_c0_##name();					\
+	res = read_##name();					\
 	new = res | set;					\
-	write_c0_##name(new);					\
+	write_##name(new);					\
 								\
 	return res;						\
 }								\
 								\
 static inline unsigned int					\
-clear_c0_##name(unsigned int clear)				\
+clear_##name(unsigned int clear)				\
 {								\
 	unsigned int res, new;					\
 								\
-	res = read_c0_##name();					\
+	res = read_##name();					\
 	new = res & ~clear;					\
-	write_c0_##name(new);					\
+	write_##name(new);					\
 								\
 	return res;						\
 }								\
 								\
 static inline unsigned int					\
-change_c0_##name(unsigned int change, unsigned int val)		\
+change_##name(unsigned int change, unsigned int val)		\
 {								\
 	unsigned int res, new;					\
 								\
-	res = read_c0_##name();					\
+	res = read_##name();					\
 	new = res & ~change;					\
 	new |= (val & change);					\
-	write_c0_##name(new);					\
+	write_##name(new);					\
 								\
 	return res;						\
 }
 
+/*
+ * Manipulate bits in a c0 register.
+ */
+#define __BUILD_SET_C0(name)	__BUILD_SET_COMMON(c0_##name)
+
 __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
@@ -2050,6 +2646,11 @@
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)
 __BUILD_SET_C0(pagegrain)
+__BUILD_SET_C0(guestctl0)
+__BUILD_SET_C0(guestctl0ext)
+__BUILD_SET_C0(guestctl1)
+__BUILD_SET_C0(guestctl2)
+__BUILD_SET_C0(guestctl3)
 __BUILD_SET_C0(brcm_config_0)
 __BUILD_SET_C0(brcm_bus_pll)
 __BUILD_SET_C0(brcm_reset)
@@ -2059,12 +2660,21 @@
 __BUILD_SET_C0(brcm_mode)
 
 /*
+ * Manipulate bits in a guest c0 register.
+ */
+#define __BUILD_SET_GC0(name)	__BUILD_SET_COMMON(gc0_##name)
+
+__BUILD_SET_GC0(status)
+__BUILD_SET_GC0(cause)
+__BUILD_SET_GC0(ebase)
+
+/*
  * Return low 10 bits of ebase.
  * Note that under KVM (MIPSVZ) this returns vcpu id.
  */
 static inline unsigned int get_ebase_cpunum(void)
 {
-	return read_c0_ebase() & 0x3ff;
+	return read_c0_ebase() & MIPS_EBASE_CPUNUM;
 }
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 45914b5..fc57e13 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -65,37 +65,32 @@
 	back_to_back_c0_hazard();					\
 	TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir)
 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT*/
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-
-#define ASID_INC	0x40
-#define ASID_MASK	0xfc0
-
-#elif defined(CONFIG_CPU_R8000)
-
-#define ASID_INC	0x10
-#define ASID_MASK	0xff0
-
-#else /* FIXME: not correct for R6000 */
-
-#define ASID_INC	0x1
-#define ASID_MASK	0xff
-
-#endif
-
-#define cpu_context(cpu, mm)	((mm)->context.asid[cpu])
-#define cpu_asid(cpu, mm)	(cpu_context((cpu), (mm)) & ASID_MASK)
-#define asid_cache(cpu)		(cpu_data[cpu].asid_cache)
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
 
 /*
  *  All unused by hardware upper bits will be considered
  *  as a software asid extension.
  */
-#define ASID_VERSION_MASK  ((unsigned long)~(ASID_MASK|(ASID_MASK-1)))
-#define ASID_FIRST_VERSION ((unsigned long)(~ASID_VERSION_MASK) + 1)
+static unsigned long asid_version_mask(unsigned int cpu)
+{
+	unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
+
+	return ~(asid_mask | (asid_mask - 1));
+}
+
+static unsigned long asid_first_version(unsigned int cpu)
+{
+	return ~asid_version_mask(cpu) + 1;
+}
+
+#define cpu_context(cpu, mm)	((mm)->context.asid[cpu])
+#define asid_cache(cpu)		(cpu_data[cpu].asid_cache)
+#define cpu_asid(cpu, mm) \
+	(cpu_context((cpu), (mm)) & cpu_asid_mask(&cpu_data[cpu]))
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
 
 /* Normal, classic MIPS get_new_mmu_context */
 static inline void
@@ -104,7 +99,7 @@
 	extern void kvm_local_flush_tlb_all(void);
 	unsigned long asid = asid_cache(cpu);
 
-	if (! ((asid += ASID_INC) & ASID_MASK) ) {
+	if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
 		if (cpu_has_vtag_icache)
 			flush_icache_all();
 #ifdef CONFIG_KVM
@@ -113,7 +108,7 @@
 		local_flush_tlb_all();	/* start new asid cycle */
 #endif
 		if (!asid)		/* fix version if needed */
-			asid = ASID_FIRST_VERSION;
+			asid = asid_first_version(cpu);
 	}
 
 	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
@@ -145,7 +140,7 @@
 
 	htw_stop();
 	/* Check if our ASID is of an older version and thus invalid */
-	if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK)
+	if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & asid_version_mask(cpu))
 		get_new_mmu_context(next, cpu);
 	write_c0_entryhi(cpu_asid(cpu, next));
 	TLBMISS_HANDLER_SETUP_PGD(next->pgd);
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index bbb85fe..6e4effa 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -147,6 +147,19 @@
 		_restore_msa(t);
 }
 
+static inline void init_msa_upper(void)
+{
+	/*
+	 * Check cpu_has_msa only if it's a constant. This will allow the
+	 * compiler to optimise out code for CPUs without MSA without adding
+	 * an extra redundant check for CPUs with MSA.
+	 */
+	if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
+		return;
+
+	_init_msa_upper();
+}
+
 #ifdef TOOLCHAIN_SUPPORTS_MSA
 
 #define __BUILD_MSA_CTL_REG(name, cs)				\
diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
index d92cf59..6278776 100644
--- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
@@ -32,6 +32,8 @@
 #ifndef __CVMX_BOOTINFO_H__
 #define __CVMX_BOOTINFO_H__
 
+#include "cvmx-coremask.h"
+
 /*
  * Current major and minor versions of the CVMX bootinfo block that is
  * passed from the bootloader to the application.  This is versioned
@@ -39,7 +41,7 @@
  * versions.
  */
 #define CVMX_BOOTINFO_MAJ_VER 1
-#define CVMX_BOOTINFO_MIN_VER 3
+#define CVMX_BOOTINFO_MIN_VER 4
 
 #if (CVMX_BOOTINFO_MAJ_VER == 1)
 #define CVMX_BOOTINFO_OCTEON_SERIAL_LEN 20
@@ -124,6 +126,13 @@
 	 */
 	uint64_t fdt_addr;
 #endif
+#if (CVMX_BOOTINFO_MIN_VER >= 4)
+	/*
+	 * Coremask used for processors with more than 32 cores
+	 * or with OCI.  This replaces core_mask.
+	 */
+	struct cvmx_coremask ext_core_mask;
+#endif
 #else				/* __BIG_ENDIAN */
 	/*
 	 * Little-Endian: When the CPU mode is switched to
@@ -177,6 +186,9 @@
 #if (CVMX_BOOTINFO_MIN_VER >= 3)
 	uint64_t fdt_addr;
 #endif
+#if (CVMX_BOOTINFO_MIN_VER >= 4)
+	struct cvmx_coremask ext_core_mask;
+#endif
 #endif
 };
 
@@ -388,7 +400,7 @@
 		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KONTRON_S1901)
 		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MAX)
 	}
-	return "Unsupported Board";
+	return NULL;
 }
 
 #define ENUM_CHIP_TYPE_CASE(x) \
diff --git a/arch/mips/include/asm/octeon/cvmx-ciu3-defs.h b/arch/mips/include/asm/octeon/cvmx-ciu3-defs.h
new file mode 100644
index 0000000..547f778
--- /dev/null
+++ b/arch/mips/include/asm/octeon/cvmx-ciu3-defs.h
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2003-2016 Cavium Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+
+#ifndef __CVMX_CIU3_DEFS_H__
+#define __CVMX_CIU3_DEFS_H__
+
+#define CVMX_CIU3_FUSE CVMX_ADD_IO_SEG(0x00010100000001A0ull)
+#define CVMX_CIU3_BIST CVMX_ADD_IO_SEG(0x00010100000001C0ull)
+#define CVMX_CIU3_CONST CVMX_ADD_IO_SEG(0x0001010000000220ull)
+#define CVMX_CIU3_CTL CVMX_ADD_IO_SEG(0x00010100000000E0ull)
+#define CVMX_CIU3_DESTX_IO_INT(offset) (CVMX_ADD_IO_SEG(0x0001010000210000ull) + ((offset) & 7) * 8)
+#define CVMX_CIU3_DESTX_PP_INT(offset) (CVMX_ADD_IO_SEG(0x0001010000200000ull) + ((offset) & 255) * 8)
+#define CVMX_CIU3_GSTOP CVMX_ADD_IO_SEG(0x0001010000000140ull)
+#define CVMX_CIU3_IDTX_CTL(offset) (CVMX_ADD_IO_SEG(0x0001010000110000ull) + ((offset) & 255) * 8)
+#define CVMX_CIU3_IDTX_IO(offset) (CVMX_ADD_IO_SEG(0x0001010000130000ull) + ((offset) & 255) * 8)
+#define CVMX_CIU3_IDTX_PPX(offset, block_id) (CVMX_ADD_IO_SEG(0x0001010000120000ull) + ((block_id) & 255) * 0x20ull)
+#define CVMX_CIU3_INTR_RAM_ECC_CTL CVMX_ADD_IO_SEG(0x0001010000000260ull)
+#define CVMX_CIU3_INTR_RAM_ECC_ST CVMX_ADD_IO_SEG(0x0001010000000280ull)
+#define CVMX_CIU3_INTR_READY CVMX_ADD_IO_SEG(0x00010100000002A0ull)
+#define CVMX_CIU3_INTR_SLOWDOWN CVMX_ADD_IO_SEG(0x0001010000000240ull)
+#define CVMX_CIU3_ISCX_CTL(offset) (CVMX_ADD_IO_SEG(0x0001010080000000ull) + ((offset) & 1048575) * 8)
+#define CVMX_CIU3_ISCX_W1C(offset) (CVMX_ADD_IO_SEG(0x0001010090000000ull) + ((offset) & 1048575) * 8)
+#define CVMX_CIU3_ISCX_W1S(offset) (CVMX_ADD_IO_SEG(0x00010100A0000000ull) + ((offset) & 1048575) * 8)
+#define CVMX_CIU3_NMI CVMX_ADD_IO_SEG(0x0001010000000160ull)
+#define CVMX_CIU3_SISCX(offset) (CVMX_ADD_IO_SEG(0x0001010000220000ull) + ((offset) & 255) * 8)
+#define CVMX_CIU3_TIMX(offset) (CVMX_ADD_IO_SEG(0x0001010000010000ull) + ((offset) & 15) * 8)
+
+union cvmx_ciu3_bist {
+	uint64_t u64;
+	struct cvmx_ciu3_bist_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_9_63                : 55;
+	uint64_t bist                         : 9;
+#else
+	uint64_t bist                         : 9;
+	uint64_t reserved_9_63                : 55;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_const {
+	uint64_t u64;
+	struct cvmx_ciu3_const_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t dests_io                     : 16;
+	uint64_t pintsn                       : 16;
+	uint64_t dests_pp                     : 16;
+	uint64_t idt                          : 16;
+#else
+	uint64_t idt                          : 16;
+	uint64_t dests_pp                     : 16;
+	uint64_t pintsn                       : 16;
+	uint64_t dests_io                     : 16;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_ctl {
+	uint64_t u64;
+	struct cvmx_ciu3_ctl_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_5_63                : 59;
+	uint64_t mcd_sel                      : 2;
+	uint64_t iscmem_le                    : 1;
+	uint64_t seq_dis                      : 1;
+	uint64_t cclk_dis                     : 1;
+#else
+	uint64_t cclk_dis                     : 1;
+	uint64_t seq_dis                      : 1;
+	uint64_t iscmem_le                    : 1;
+	uint64_t mcd_sel                      : 2;
+	uint64_t reserved_5_63                : 59;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_destx_io_int {
+	uint64_t u64;
+	struct cvmx_ciu3_destx_io_int_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_52_63               : 12;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_10_31               : 22;
+	uint64_t intidt                       : 8;
+	uint64_t newint                       : 1;
+	uint64_t intr                         : 1;
+#else
+	uint64_t intr                         : 1;
+	uint64_t newint                       : 1;
+	uint64_t intidt                       : 8;
+	uint64_t reserved_10_31               : 22;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_52_63               : 12;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_destx_pp_int {
+	uint64_t u64;
+	struct cvmx_ciu3_destx_pp_int_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_52_63               : 12;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_10_31               : 22;
+	uint64_t intidt                       : 8;
+	uint64_t newint                       : 1;
+	uint64_t intr                         : 1;
+#else
+	uint64_t intr                         : 1;
+	uint64_t newint                       : 1;
+	uint64_t intidt                       : 8;
+	uint64_t reserved_10_31               : 22;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_52_63               : 12;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_gstop {
+	uint64_t u64;
+	struct cvmx_ciu3_gstop_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_1_63                : 63;
+	uint64_t gstop                        : 1;
+#else
+	uint64_t gstop                        : 1;
+	uint64_t reserved_1_63                : 63;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_idtx_ctl {
+	uint64_t u64;
+	struct cvmx_ciu3_idtx_ctl_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_52_63               : 12;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_4_31                : 28;
+	uint64_t intr                         : 1;
+	uint64_t newint                       : 1;
+	uint64_t ip_num                       : 2;
+#else
+	uint64_t ip_num                       : 2;
+	uint64_t newint                       : 1;
+	uint64_t intr                         : 1;
+	uint64_t reserved_4_31                : 28;
+	uint64_t intsn                        : 20;
+	uint64_t reserved_52_63               : 12;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_idtx_io {
+	uint64_t u64;
+	struct cvmx_ciu3_idtx_io_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_5_63                : 59;
+	uint64_t io                           : 5;
+#else
+	uint64_t io                           : 5;
+	uint64_t reserved_5_63                : 59;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_idtx_ppx {
+	uint64_t u64;
+	struct cvmx_ciu3_idtx_ppx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_48_63               : 16;
+	uint64_t pp                           : 48;
+#else
+	uint64_t pp                           : 48;
+	uint64_t reserved_48_63               : 16;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_intr_ram_ecc_ctl {
+	uint64_t u64;
+	struct cvmx_ciu3_intr_ram_ecc_ctl_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_3_63                : 61;
+	uint64_t flip_synd                    : 2;
+	uint64_t ecc_ena                      : 1;
+#else
+	uint64_t ecc_ena                      : 1;
+	uint64_t flip_synd                    : 2;
+	uint64_t reserved_3_63                : 61;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_intr_ram_ecc_st {
+	uint64_t u64;
+	struct cvmx_ciu3_intr_ram_ecc_st_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_52_63               : 12;
+	uint64_t addr                         : 20;
+	uint64_t reserved_6_31                : 26;
+	uint64_t sisc_dbe                     : 1;
+	uint64_t sisc_sbe                     : 1;
+	uint64_t idt_dbe                      : 1;
+	uint64_t idt_sbe                      : 1;
+	uint64_t isc_dbe                      : 1;
+	uint64_t isc_sbe                      : 1;
+#else
+	uint64_t isc_sbe                      : 1;
+	uint64_t isc_dbe                      : 1;
+	uint64_t idt_sbe                      : 1;
+	uint64_t idt_dbe                      : 1;
+	uint64_t sisc_sbe                     : 1;
+	uint64_t sisc_dbe                     : 1;
+	uint64_t reserved_6_31                : 26;
+	uint64_t addr                         : 20;
+	uint64_t reserved_52_63               : 12;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_intr_ready {
+	uint64_t u64;
+	struct cvmx_ciu3_intr_ready_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_46_63               : 18;
+	uint64_t index                        : 14;
+	uint64_t reserved_1_31                : 31;
+	uint64_t ready                        : 1;
+#else
+	uint64_t ready                        : 1;
+	uint64_t reserved_1_31                : 31;
+	uint64_t index                        : 14;
+	uint64_t reserved_46_63               : 18;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_intr_slowdown {
+	uint64_t u64;
+	struct cvmx_ciu3_intr_slowdown_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_3_63                : 61;
+	uint64_t ctl                          : 3;
+#else
+	uint64_t ctl                          : 3;
+	uint64_t reserved_3_63                : 61;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_iscx_ctl {
+	uint64_t u64;
+	struct cvmx_ciu3_iscx_ctl_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_24_63               : 40;
+	uint64_t idt                          : 8;
+	uint64_t imp                          : 1;
+	uint64_t reserved_2_14                : 13;
+	uint64_t en                           : 1;
+	uint64_t raw                          : 1;
+#else
+	uint64_t raw                          : 1;
+	uint64_t en                           : 1;
+	uint64_t reserved_2_14                : 13;
+	uint64_t imp                          : 1;
+	uint64_t idt                          : 8;
+	uint64_t reserved_24_63               : 40;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_iscx_w1c {
+	uint64_t u64;
+	struct cvmx_ciu3_iscx_w1c_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_2_63                : 62;
+	uint64_t en                           : 1;
+	uint64_t raw                          : 1;
+#else
+	uint64_t raw                          : 1;
+	uint64_t en                           : 1;
+	uint64_t reserved_2_63                : 62;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_iscx_w1s {
+	uint64_t u64;
+	struct cvmx_ciu3_iscx_w1s_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_2_63                : 62;
+	uint64_t en                           : 1;
+	uint64_t raw                          : 1;
+#else
+	uint64_t raw                          : 1;
+	uint64_t en                           : 1;
+	uint64_t reserved_2_63                : 62;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_nmi {
+	uint64_t u64;
+	struct cvmx_ciu3_nmi_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_48_63               : 16;
+	uint64_t nmi                          : 48;
+#else
+	uint64_t nmi                          : 48;
+	uint64_t reserved_48_63               : 16;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_siscx {
+	uint64_t u64;
+	struct cvmx_ciu3_siscx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t en                           : 64;
+#else
+	uint64_t en                           : 64;
+#endif
+	} s;
+};
+
+union cvmx_ciu3_timx {
+	uint64_t u64;
+	struct cvmx_ciu3_timx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+	uint64_t reserved_37_63               : 27;
+	uint64_t one_shot                     : 1;
+	uint64_t len                          : 36;
+#else
+	uint64_t len                          : 36;
+	uint64_t one_shot                     : 1;
+	uint64_t reserved_37_63               : 27;
+#endif
+	} s;
+};
+
+#endif
diff --git a/arch/mips/include/asm/octeon/cvmx-coremask.h b/arch/mips/include/asm/octeon/cvmx-coremask.h
new file mode 100644
index 0000000..097dc09
--- /dev/null
+++ b/arch/mips/include/asm/octeon/cvmx-coremask.h
@@ -0,0 +1,89 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2016  Cavium Inc. (support@cavium.com).
+ *
+ */
+
+/*
+ * Module to support operations on bitmap of cores. Coremask can be used to
+ * select a specific core, a group of cores, or all available cores, for
+ * initialization and differentiation of roles within a single shared binary
+ * executable image.
+ *
+ * The core numbers used in this file are the same value as what is found in
+ * the COP0_EBASE register and the rdhwr 0 instruction.
+ *
+ * For the CN78XX and other multi-node environments the core numbers are not
+ * contiguous.  The core numbers for the CN78XX are as follows:
+ *
+ * Node 0:	Cores 0 - 47
+ * Node 1:	Cores 128 - 175
+ * Node 2:	Cores 256 - 303
+ * Node 3:	Cores 384 - 431
+ *
+ */
+
+#ifndef __CVMX_COREMASK_H__
+#define __CVMX_COREMASK_H__
+
+#define CVMX_MIPS_MAX_CORES 1024
+/* bits per holder */
+#define CVMX_COREMASK_ELTSZ 64
+
+/* cvmx_coremask_t's size in u64 */
+#define CVMX_COREMASK_BMPSZ (CVMX_MIPS_MAX_CORES / CVMX_COREMASK_ELTSZ)
+
+
+/* cvmx_coremask_t */
+struct cvmx_coremask {
+	u64 coremask_bitmap[CVMX_COREMASK_BMPSZ];
+};
+
+/*
+ * Is ``core'' set in the coremask?
+ */
+static inline bool cvmx_coremask_is_core_set(const struct cvmx_coremask *pcm,
+					    int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_ELTSZ;
+	i = core / CVMX_COREMASK_ELTSZ;
+
+	return (pcm->coremask_bitmap[i] & ((u64)1 << n)) != 0;
+}
+
+/*
+ * Make a copy of a coremask
+ */
+static inline void cvmx_coremask_copy(struct cvmx_coremask *dest,
+				      const struct cvmx_coremask *src)
+{
+	memcpy(dest, src, sizeof(*dest));
+}
+
+/*
+ * Set the lower 64-bit of the coremask.
+ */
+static inline void cvmx_coremask_set64(struct cvmx_coremask *pcm,
+				       uint64_t coremask_64)
+{
+	pcm->coremask_bitmap[0] = coremask_64;
+}
+
+/*
+ * Clear ``core'' from the coremask.
+ */
+static inline void cvmx_coremask_clear_core(struct cvmx_coremask *pcm, int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_ELTSZ;
+	i = core / CVMX_COREMASK_ELTSZ;
+	pcm->coremask_bitmap[i] &= ~(1ull << n);
+}
+
+#endif /* __CVMX_COREMASK_H__ */
diff --git a/arch/mips/include/asm/octeon/cvmx-fpa-defs.h b/arch/mips/include/asm/octeon/cvmx-fpa-defs.h
index 1d79e3c..887ff8e 100644
--- a/arch/mips/include/asm/octeon/cvmx-fpa-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-fpa-defs.h
@@ -66,6 +66,7 @@
 #define CVMX_FPA_WART_CTL (CVMX_ADD_IO_SEG(0x00011800280000D8ull))
 #define CVMX_FPA_WART_STATUS (CVMX_ADD_IO_SEG(0x00011800280000E0ull))
 #define CVMX_FPA_WQE_THRESHOLD (CVMX_ADD_IO_SEG(0x0001180028000468ull))
+#define CVMX_FPA_CLK_COUNT (CVMX_ADD_IO_SEG(0x00012800000000F0ull))
 
 union cvmx_fpa_addr_range_error {
 	uint64_t u64;
diff --git a/arch/mips/include/asm/octeon/cvmx-mio-defs.h b/arch/mips/include/asm/octeon/cvmx-mio-defs.h
index bb0ae33..5196c04 100644
--- a/arch/mips/include/asm/octeon/cvmx-mio-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-mio-defs.h
@@ -1481,7 +1481,9 @@
 	uint64_t u64;
 	struct cvmx_mio_fus_dat2_s {
 #ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_48_63:16;
+		uint64_t reserved_59_63:5;
+		uint64_t run_platform:3;
+		uint64_t gbl_pwr_throttle:8;
 		uint64_t fus118:1;
 		uint64_t rom_info:10;
 		uint64_t power_limit:2;
@@ -1513,7 +1515,9 @@
 		uint64_t power_limit:2;
 		uint64_t rom_info:10;
 		uint64_t fus118:1;
-		uint64_t reserved_48_63:16;
+		uint64_t gbl_pwr_throttle:8;
+		uint64_t run_platform:3;
+		uint64_t reserved_59_63:5;
 #endif
 	} s;
 	struct cvmx_mio_fus_dat2_cn30xx {
@@ -1837,50 +1841,192 @@
 #endif
 	} cn68xx;
 	struct cvmx_mio_fus_dat2_cn68xx cn68xxp1;
+	struct cvmx_mio_fus_dat2_cn70xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_48_63:16;
+		uint64_t fus118:1;
+		uint64_t rom_info:10;
+		uint64_t power_limit:2;
+		uint64_t dorm_crypto:1;
+		uint64_t fus318:1;
+		uint64_t raid_en:1;
+		uint64_t reserved_31_29:3;
+		uint64_t nodfa_cp2:1;
+		uint64_t nomul:1;
+		uint64_t nocrypto:1;
+		uint64_t reserved_25_24:2;
+		uint64_t chip_id:8;
+		uint64_t reserved_15_0:16;
+#else
+		uint64_t reserved_15_0:16;
+		uint64_t chip_id:8;
+		uint64_t reserved_25_24:2;
+		uint64_t nocrypto:1;
+		uint64_t nomul:1;
+		uint64_t nodfa_cp2:1;
+		uint64_t reserved_31_29:3;
+		uint64_t raid_en:1;
+		uint64_t fus318:1;
+		uint64_t dorm_crypto:1;
+		uint64_t power_limit:2;
+		uint64_t rom_info:10;
+		uint64_t fus118:1;
+		uint64_t reserved_48_63:16;
+#endif
+	} cn70xx;
+	struct cvmx_mio_fus_dat2_cn70xx cn70xxp1;
+	struct cvmx_mio_fus_dat2_cn73xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_59_63:5;
+		uint64_t run_platform:3;
+		uint64_t gbl_pwr_throttle:8;
+		uint64_t fus118:1;
+		uint64_t rom_info:10;
+		uint64_t power_limit:2;
+		uint64_t dorm_crypto:1;
+		uint64_t fus318:1;
+		uint64_t raid_en:1;
+		uint64_t reserved_31_29:3;
+		uint64_t nodfa_cp2:1;
+		uint64_t nomul:1;
+		uint64_t nocrypto:1;
+		uint64_t reserved_25_24:2;
+		uint64_t chip_id:8;
+		uint64_t reserved_15_0:16;
+#else
+		uint64_t reserved_15_0:16;
+		uint64_t chip_id:8;
+		uint64_t reserved_25_24:2;
+		uint64_t nocrypto:1;
+		uint64_t nomul:1;
+		uint64_t nodfa_cp2:1;
+		uint64_t reserved_31_29:3;
+		uint64_t raid_en:1;
+		uint64_t fus318:1;
+		uint64_t dorm_crypto:1;
+		uint64_t power_limit:2;
+		uint64_t rom_info:10;
+		uint64_t fus118:1;
+		uint64_t gbl_pwr_throttle:8;
+		uint64_t run_platform:3;
+		uint64_t reserved_59_63:5;
+#endif
+	} cn73xx;
+	struct cvmx_mio_fus_dat2_cn78xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_59_63:5;
+		uint64_t run_platform:3;
+		uint64_t reserved_48_55:8;
+		uint64_t fus118:1;
+		uint64_t rom_info:10;
+		uint64_t power_limit:2;
+		uint64_t dorm_crypto:1;
+		uint64_t fus318:1;
+		uint64_t raid_en:1;
+		uint64_t reserved_31_29:3;
+		uint64_t nodfa_cp2:1;
+		uint64_t nomul:1;
+		uint64_t nocrypto:1;
+		uint64_t reserved_25_24:2;
+		uint64_t chip_id:8;
+		uint64_t reserved_0_15:16;
+#else
+		uint64_t reserved_0_15:16;
+		uint64_t chip_id:8;
+		uint64_t reserved_25_24:2;
+		uint64_t nocrypto:1;
+		uint64_t nomul:1;
+		uint64_t nodfa_cp2:1;
+		uint64_t reserved_31_29:3;
+		uint64_t raid_en:1;
+		uint64_t fus318:1;
+		uint64_t dorm_crypto:1;
+		uint64_t power_limit:2;
+		uint64_t rom_info:10;
+		uint64_t fus118:1;
+		uint64_t reserved_48_55:8;
+		uint64_t run_platform:3;
+		uint64_t reserved_59_63:5;
+#endif
+	} cn78xx;
+	struct cvmx_mio_fus_dat2_cn78xxp2 {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_59_63:5;
+		uint64_t run_platform:3;
+		uint64_t gbl_pwr_throttle:8;
+		uint64_t fus118:1;
+		uint64_t rom_info:10;
+		uint64_t power_limit:2;
+		uint64_t dorm_crypto:1;
+		uint64_t fus318:1;
+		uint64_t raid_en:1;
+		uint64_t reserved_31_29:3;
+		uint64_t nodfa_cp2:1;
+		uint64_t nomul:1;
+		uint64_t nocrypto:1;
+		uint64_t reserved_25_24:2;
+		uint64_t chip_id:8;
+		uint64_t reserved_0_15:16;
+#else
+		uint64_t reserved_0_15:16;
+		uint64_t chip_id:8;
+		uint64_t reserved_25_24:2;
+		uint64_t nocrypto:1;
+		uint64_t nomul:1;
+		uint64_t nodfa_cp2:1;
+		uint64_t reserved_31_29:3;
+		uint64_t raid_en:1;
+		uint64_t fus318:1;
+		uint64_t dorm_crypto:1;
+		uint64_t power_limit:2;
+		uint64_t rom_info:10;
+		uint64_t fus118:1;
+		uint64_t gbl_pwr_throttle:8;
+		uint64_t run_platform:3;
+		uint64_t reserved_59_63:5;
+#endif
+	} cn78xxp2;
 	struct cvmx_mio_fus_dat2_cn61xx cnf71xx;
+	struct cvmx_mio_fus_dat2_cn73xx cnf75xx;
 };
 
 union cvmx_mio_fus_dat3 {
 	uint64_t u64;
 	struct cvmx_mio_fus_dat3_s {
 #ifdef __BIG_ENDIAN_BITFIELD
-		uint64_t reserved_58_63:6;
+		uint64_t ema0:6;
 		uint64_t pll_ctl:10;
 		uint64_t dfa_info_dte:3;
 		uint64_t dfa_info_clm:4;
-		uint64_t reserved_40_40:1;
-		uint64_t ema:2;
+		uint64_t pll_alt_matrix:1;
+		uint64_t reserved_38_39:2;
 		uint64_t efus_lck_rsv:1;
 		uint64_t efus_lck_man:1;
 		uint64_t pll_half_dis:1;
 		uint64_t l2c_crip:3;
-		uint64_t pll_div4:1;
-		uint64_t reserved_29_30:2;
-		uint64_t bar2_en:1;
+		uint64_t reserved_28_31:4;
 		uint64_t efus_lck:1;
 		uint64_t efus_ign:1;
 		uint64_t nozip:1;
 		uint64_t nodfa_dte:1;
-		uint64_t icache:24;
+		uint64_t reserved_0_23:24;
 #else
-		uint64_t icache:24;
+		uint64_t reserved_0_23:24;
 		uint64_t nodfa_dte:1;
 		uint64_t nozip:1;
 		uint64_t efus_ign:1;
 		uint64_t efus_lck:1;
-		uint64_t bar2_en:1;
-		uint64_t reserved_29_30:2;
-		uint64_t pll_div4:1;
+		uint64_t reserved_28_31:4;
 		uint64_t l2c_crip:3;
 		uint64_t pll_half_dis:1;
 		uint64_t efus_lck_man:1;
 		uint64_t efus_lck_rsv:1;
-		uint64_t ema:2;
-		uint64_t reserved_40_40:1;
+		uint64_t reserved_38_39:2;
+		uint64_t pll_alt_matrix:1;
 		uint64_t dfa_info_clm:4;
 		uint64_t dfa_info_dte:3;
 		uint64_t pll_ctl:10;
-		uint64_t reserved_58_63:6;
+		uint64_t ema0:6;
 #endif
 	} s;
 	struct cvmx_mio_fus_dat3_cn30xx {
@@ -2022,7 +2168,239 @@
 	struct cvmx_mio_fus_dat3_cn61xx cn66xx;
 	struct cvmx_mio_fus_dat3_cn61xx cn68xx;
 	struct cvmx_mio_fus_dat3_cn61xx cn68xxp1;
+	struct cvmx_mio_fus_dat3_cn70xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t ema0:6;
+		uint64_t pll_ctl:10;
+		uint64_t dfa_info_dte:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t pll_alt_matrix:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t efus_lck_rsv:1;
+		uint64_t efus_lck_man:1;
+		uint64_t pll_half_dis:1;
+		uint64_t l2c_crip:3;
+		uint64_t use_int_refclk:1;
+		uint64_t zip_info:2;
+		uint64_t bar2_sz_conf:1;
+		uint64_t efus_lck:1;
+		uint64_t efus_ign:1;
+		uint64_t nozip:1;
+		uint64_t nodfa_dte:1;
+		uint64_t ema1:6;
+		uint64_t reserved_0_17:18;
+#else
+		uint64_t reserved_0_17:18;
+		uint64_t ema1:6;
+		uint64_t nodfa_dte:1;
+		uint64_t nozip:1;
+		uint64_t efus_ign:1;
+		uint64_t efus_lck:1;
+		uint64_t bar2_sz_conf:1;
+		uint64_t zip_info:2;
+		uint64_t use_int_refclk:1;
+		uint64_t l2c_crip:3;
+		uint64_t pll_half_dis:1;
+		uint64_t efus_lck_man:1;
+		uint64_t efus_lck_rsv:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t pll_alt_matrix:1;
+		uint64_t dfa_info_clm:4;
+		uint64_t dfa_info_dte:3;
+		uint64_t pll_ctl:10;
+		uint64_t ema0:6;
+#endif
+	} cn70xx;
+	struct cvmx_mio_fus_dat3_cn70xxp1 {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t ema0:6;
+		uint64_t pll_ctl:10;
+		uint64_t dfa_info_dte:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t reserved_38_40:3;
+		uint64_t efus_lck_rsv:1;
+		uint64_t efus_lck_man:1;
+		uint64_t pll_half_dis:1;
+		uint64_t l2c_crip:3;
+		uint64_t reserved_31_31:1;
+		uint64_t zip_info:2;
+		uint64_t bar2_sz_conf:1;
+		uint64_t efus_lck:1;
+		uint64_t efus_ign:1;
+		uint64_t nozip:1;
+		uint64_t nodfa_dte:1;
+		uint64_t ema1:6;
+		uint64_t reserved_0_17:18;
+#else
+		uint64_t reserved_0_17:18;
+		uint64_t ema1:6;
+		uint64_t nodfa_dte:1;
+		uint64_t nozip:1;
+		uint64_t efus_ign:1;
+		uint64_t efus_lck:1;
+		uint64_t bar2_sz_conf:1;
+		uint64_t zip_info:2;
+		uint64_t reserved_31_31:1;
+		uint64_t l2c_crip:3;
+		uint64_t pll_half_dis:1;
+		uint64_t efus_lck_man:1;
+		uint64_t efus_lck_rsv:1;
+		uint64_t reserved_38_40:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t dfa_info_dte:3;
+		uint64_t pll_ctl:10;
+		uint64_t ema0:6;
+#endif
+	} cn70xxp1;
+	struct cvmx_mio_fus_dat3_cn73xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t ema0:6;
+		uint64_t pll_ctl:10;
+		uint64_t dfa_info_dte:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t pll_alt_matrix:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t efus_lck_rsv:1;
+		uint64_t efus_lck_man:1;
+		uint64_t pll_half_dis:1;
+		uint64_t l2c_crip:3;
+		uint64_t use_int_refclk:1;
+		uint64_t zip_info:2;
+		uint64_t bar2_sz_conf:1;
+		uint64_t efus_lck:1;
+		uint64_t efus_ign:1;
+		uint64_t nozip:1;
+		uint64_t nodfa_dte:1;
+		uint64_t ema1:6;
+		uint64_t nohna_dte:1;
+		uint64_t hna_info_dte:3;
+		uint64_t hna_info_clm:4;
+		uint64_t reserved_9_9:1;
+		uint64_t core_pll_mul:5;
+		uint64_t pnr_pll_mul:4;
+#else
+		uint64_t pnr_pll_mul:4;
+		uint64_t core_pll_mul:5;
+		uint64_t reserved_9_9:1;
+		uint64_t hna_info_clm:4;
+		uint64_t hna_info_dte:3;
+		uint64_t nohna_dte:1;
+		uint64_t ema1:6;
+		uint64_t nodfa_dte:1;
+		uint64_t nozip:1;
+		uint64_t efus_ign:1;
+		uint64_t efus_lck:1;
+		uint64_t bar2_sz_conf:1;
+		uint64_t zip_info:2;
+		uint64_t use_int_refclk:1;
+		uint64_t l2c_crip:3;
+		uint64_t pll_half_dis:1;
+		uint64_t efus_lck_man:1;
+		uint64_t efus_lck_rsv:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t pll_alt_matrix:1;
+		uint64_t dfa_info_clm:4;
+		uint64_t dfa_info_dte:3;
+		uint64_t pll_ctl:10;
+		uint64_t ema0:6;
+#endif
+	} cn73xx;
+	struct cvmx_mio_fus_dat3_cn78xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t ema0:6;
+		uint64_t pll_ctl:10;
+		uint64_t dfa_info_dte:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t reserved_38_40:3;
+		uint64_t efus_lck_rsv:1;
+		uint64_t efus_lck_man:1;
+		uint64_t pll_half_dis:1;
+		uint64_t l2c_crip:3;
+		uint64_t reserved_31_31:1;
+		uint64_t zip_info:2;
+		uint64_t bar2_sz_conf:1;
+		uint64_t efus_lck:1;
+		uint64_t efus_ign:1;
+		uint64_t nozip:1;
+		uint64_t nodfa_dte:1;
+		uint64_t ema1:6;
+		uint64_t nohna_dte:1;
+		uint64_t hna_info_dte:3;
+		uint64_t hna_info_clm:4;
+		uint64_t reserved_0_9:10;
+#else
+		uint64_t reserved_0_9:10;
+		uint64_t hna_info_clm:4;
+		uint64_t hna_info_dte:3;
+		uint64_t nohna_dte:1;
+		uint64_t ema1:6;
+		uint64_t nodfa_dte:1;
+		uint64_t nozip:1;
+		uint64_t efus_ign:1;
+		uint64_t efus_lck:1;
+		uint64_t bar2_sz_conf:1;
+		uint64_t zip_info:2;
+		uint64_t reserved_31_31:1;
+		uint64_t l2c_crip:3;
+		uint64_t pll_half_dis:1;
+		uint64_t efus_lck_man:1;
+		uint64_t efus_lck_rsv:1;
+		uint64_t reserved_38_40:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t dfa_info_dte:3;
+		uint64_t pll_ctl:10;
+		uint64_t ema0:6;
+#endif
+	} cn78xx;
+	struct cvmx_mio_fus_dat3_cn73xx cn78xxp2;
 	struct cvmx_mio_fus_dat3_cn61xx cnf71xx;
+	struct cvmx_mio_fus_dat3_cnf75xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t ema0:6;
+		uint64_t pll_ctl:10;
+		uint64_t dfa_info_dte:3;
+		uint64_t dfa_info_clm:4;
+		uint64_t pll_alt_matrix:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t efus_lck_rsv:1;
+		uint64_t efus_lck_man:1;
+		uint64_t pll_half_dis:1;
+		uint64_t l2c_crip:3;
+		uint64_t use_int_refclk:1;
+		uint64_t zip_info:2;
+		uint64_t bar2_sz_conf:1;
+		uint64_t efus_lck:1;
+		uint64_t efus_ign:1;
+		uint64_t nozip:1;
+		uint64_t nodfa_dte:1;
+		uint64_t ema1:6;
+		uint64_t reserved_9_17:9;
+		uint64_t core_pll_mul:5;
+		uint64_t pnr_pll_mul:4;
+#else
+		uint64_t pnr_pll_mul:4;
+		uint64_t core_pll_mul:5;
+		uint64_t reserved_9_17:9;
+		uint64_t ema1:6;
+		uint64_t nodfa_dte:1;
+		uint64_t nozip:1;
+		uint64_t efus_ign:1;
+		uint64_t efus_lck:1;
+		uint64_t bar2_sz_conf:1;
+		uint64_t zip_info:2;
+		uint64_t use_int_refclk:1;
+		uint64_t l2c_crip:3;
+		uint64_t pll_half_dis:1;
+		uint64_t efus_lck_man:1;
+		uint64_t efus_lck_rsv:1;
+		uint64_t pll_bwadj_denom:2;
+		uint64_t pll_alt_matrix:1;
+		uint64_t dfa_info_clm:4;
+		uint64_t dfa_info_dte:3;
+		uint64_t pll_ctl:10;
+		uint64_t ema0:6;
+#endif
+	} cnf75xx;
 };
 
 union cvmx_mio_fus_ema {
diff --git a/arch/mips/include/asm/octeon/cvmx-sysinfo.h b/arch/mips/include/asm/octeon/cvmx-sysinfo.h
index 2131197..c6c3ee3 100644
--- a/arch/mips/include/asm/octeon/cvmx-sysinfo.h
+++ b/arch/mips/include/asm/octeon/cvmx-sysinfo.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2008 Cavium Networks
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -32,6 +32,8 @@
 #ifndef __CVMX_SYSINFO_H__
 #define __CVMX_SYSINFO_H__
 
+#include "cvmx-coremask.h"
+
 #define OCTEON_SERIAL_LEN 20
 /**
  * Structure describing application specific information.
@@ -50,8 +52,7 @@
 	uint64_t system_dram_size;
 
 	/* ptr to memory descriptor block */
-	void *phy_mem_desc_ptr;
-
+	uint64_t phy_mem_desc_addr;
 
 	/* Application image specific variables */
 	/* stack top address (virtual) */
@@ -63,7 +64,7 @@
 	/* heap size in bytes */
 	uint32_t heap_size;
 	/* coremask defining cores running application */
-	uint32_t core_mask;
+	struct cvmx_coremask core_mask;
 	/* Deprecated, use cvmx_coremask_first_core() to select init core */
 	uint32_t init_core;
 
@@ -121,32 +122,4 @@
 
 extern struct cvmx_sysinfo *cvmx_sysinfo_get(void);
 
-/**
- * This function is used in non-simple executive environments (such as
- * Linux kernel, u-boot, etc.)	to configure the minimal fields that
- * are required to use simple executive files directly.
- *
- * Locking (if required) must be handled outside of this
- * function
- *
- * @phy_mem_desc_ptr: Pointer to global physical memory descriptor
- *		     (bootmem descriptor) @board_type: Octeon board
- *		     type enumeration
- *
- * @board_rev_major:
- *		     Board major revision
- * @board_rev_minor:
- *		     Board minor revision
- * @cpu_clock_hz:
- *		     CPU clock freqency in hertz
- *
- * Returns 0: Failure
- *	   1: success
- */
-extern int cvmx_sysinfo_minimal_initialize(void *phy_mem_desc_ptr,
-					   uint16_t board_type,
-					   uint8_t board_rev_major,
-					   uint8_t board_rev_minor,
-					   uint32_t cpu_clock_hz);
-
 #endif /* __CVMX_SYSINFO_H__ */
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 3e982e0..2530e87 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -57,6 +57,7 @@
 #include <asm/octeon/cvmx-sysinfo.h>
 
 #include <asm/octeon/cvmx-ciu-defs.h>
+#include <asm/octeon/cvmx-ciu3-defs.h>
 #include <asm/octeon/cvmx-gpio-defs.h>
 #include <asm/octeon/cvmx-iob-defs.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
@@ -341,6 +342,21 @@
 	return core_num;
 }
 
+/* Maximum # of bits to define core in node */
+#define CVMX_NODE_NO_SHIFT	7
+#define CVMX_NODE_MASK		0x3
+static inline unsigned int cvmx_get_node_num(void)
+{
+	unsigned int core_num = cvmx_get_core_num();
+
+	return (core_num >> CVMX_NODE_NO_SHIFT) & CVMX_NODE_MASK;
+}
+
+static inline unsigned int cvmx_get_local_core_num(void)
+{
+	return cvmx_get_core_num() & ((1 << CVMX_NODE_NO_SHIFT) - 1);
+}
+
 /**
  * Returns the number of bits set in the provided value.
  * Simple wrapper for POP instruction.
@@ -448,8 +464,15 @@
 /* Return the number of cores available in the chip */
 static inline uint32_t cvmx_octeon_num_cores(void)
 {
-	uint32_t ciu_fuse = (uint32_t) cvmx_read_csr(CVMX_CIU_FUSE) & 0xffff;
-	return cvmx_pop(ciu_fuse);
+	u64 ciu_fuse_reg;
+	u64 ciu_fuse;
+
+	if (OCTEON_IS_OCTEON3() && !OCTEON_IS_MODEL(OCTEON_CN70XX))
+		ciu_fuse_reg = CVMX_CIU3_FUSE;
+	else
+		ciu_fuse_reg = CVMX_CIU_FUSE;
+	ciu_fuse = cvmx_read_csr(ciu_fuse_reg);
+	return cvmx_dpop(ciu_fuse);
 }
 
 #endif /*  __CVMX_H__  */
diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h
index 3ed10a8..a19ca3b 100644
--- a/arch/mips/include/asm/octeon/octeon-feature.h
+++ b/arch/mips/include/asm/octeon/octeon-feature.h
@@ -81,6 +81,10 @@
 	OCTEON_FEATURE_HFA,
 	OCTEON_FEATURE_DFM,
 	OCTEON_FEATURE_CIU2,
+	OCTEON_FEATURE_CIU3,
+	/* Octeon has FPA first seen on 78XX */
+	OCTEON_FEATURE_FPA3,
+	OCTEON_FEATURE_FAU,
 	OCTEON_MAX_FEATURE
 };
 
@@ -110,7 +114,7 @@
  * Returns Non zero if the feature exists. Zero if the feature does not
  *	   exist.
  */
-static inline int octeon_has_feature(enum octeon_feature feature)
+static inline bool octeon_has_feature(enum octeon_feature feature)
 {
 	switch (feature) {
 	case OCTEON_FEATURE_SAAD:
@@ -122,7 +126,7 @@
 			fus_2.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT2);
 			return !fus_2.s.nocrypto && !fus_2.s.nomul && fus_2.s.dorm_crypto;
 		} else {
-			return 0;
+			return false;
 		}
 
 	case OCTEON_FEATURE_PCIE:
@@ -190,11 +194,20 @@
 
 	case OCTEON_FEATURE_CIU2:
 		return OCTEON_IS_MODEL(OCTEON_CN68XX);
+	case OCTEON_FEATURE_CIU3:
+	case OCTEON_FEATURE_FPA3:
+		return OCTEON_IS_MODEL(OCTEON_CN78XX)
+			|| OCTEON_IS_MODEL(OCTEON_CNF75XX)
+			|| OCTEON_IS_MODEL(OCTEON_CN73XX);
+	case OCTEON_FEATURE_FAU:
+		return !(OCTEON_IS_MODEL(OCTEON_CN78XX)
+			 || OCTEON_IS_MODEL(OCTEON_CNF75XX)
+			 || OCTEON_IS_MODEL(OCTEON_CN73XX));
 
 	default:
 		break;
 	}
-	return 0;
+	return false;
 }
 
 #endif /* __OCTEON_FEATURE_H__ */
diff --git a/arch/mips/include/asm/octeon/octeon-model.h b/arch/mips/include/asm/octeon/octeon-model.h
index 92b377e..6c68517 100644
--- a/arch/mips/include/asm/octeon/octeon-model.h
+++ b/arch/mips/include/asm/octeon/octeon-model.h
@@ -74,7 +74,12 @@
  * CN7XXX models with new revision encoding
  */
 
+#define OCTEON_CNF75XX_PASS1_0	0x000d9800
+#define OCTEON_CNF75XX		(OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CNF75XX_PASS1_X	(OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
 #define OCTEON_CN73XX_PASS1_0	0x000d9700
+#define OCTEON_CN73XX_PASS1_1	0x000d9701
 #define OCTEON_CN73XX		(OCTEON_CN73XX_PASS1_0 | OM_IGNORE_REVISION)
 #define OCTEON_CN73XX_PASS1_X	(OCTEON_CN73XX_PASS1_0 | \
 				 OM_IGNORE_MINOR_REVISION)
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index de9f74e..07c0516 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -299,6 +299,31 @@
 	cvmx_read64_uint32(address ^ 4);
 }
 
+#ifdef CONFIG_SMP
+void octeon_setup_smp(void);
+#else
+static inline void octeon_setup_smp(void) {}
+#endif
+
+struct irq_domain;
+struct device_node;
+struct irq_data;
+struct irq_chip;
+void octeon_ciu3_mbox_send(int cpu, unsigned int mbox);
+int octeon_irq_ciu3_xlat(struct irq_domain *d,
+			 struct device_node *node,
+			 const u32 *intspec,
+			 unsigned int intsize,
+			 unsigned long *out_hwirq,
+			 unsigned int *out_type);
+void octeon_irq_ciu3_enable(struct irq_data *data);
+void octeon_irq_ciu3_disable(struct irq_data *data);
+void octeon_irq_ciu3_ack(struct irq_data *data);
+void octeon_irq_ciu3_mask(struct irq_data *data);
+void octeon_irq_ciu3_mask_ack(struct irq_data *data);
+int octeon_irq_ciu3_mapx(struct irq_domain *d, unsigned int virq,
+			 irq_hw_number_t hw, struct irq_chip *chip);
+
 /* Octeon multiplier save/restore routines from octeon_switch.S */
 void octeon_mult_save(void);
 void octeon_mult_restore(void);
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 8c16fb7..86b239d 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -43,8 +43,6 @@
 	   and XFree86. Eventually will be removed. */
 	unsigned int need_domain_info;
 
-	int iommu;
-
 	/* Optional access methods for reading/writing the bus number
 	   of the PCI controller */
 	int (*get_busno)(void);
@@ -106,11 +104,11 @@
 struct pci_dev;
 
 /*
- * The PCI address space does equal the physical memory address space.	The
- * networking and block device layers use this boolean for bounce buffer
- * decisions.  This is set if any hose does not have an IOMMU.
+ * The PCI address space does equal the physical memory address space.
+ * The networking and block device layers use this boolean for bounce
+ * buffer decisions.
  */
-extern unsigned int PCI_DMA_BUS_IS_PHYS;
+#define PCI_DMA_BUS_IS_PHYS     (1)
 
 #ifdef CONFIG_PCI_DOMAINS
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 832e216..d21f3da 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -103,8 +103,8 @@
 	pmd_val(*pmdp) = ((unsigned long) invalid_pte_table);
 }
 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
-#define pte_page(x)		pfn_to_page(pte_pfn(x))
+#if defined(CONFIG_XPA)
+
 #define pte_pfn(x)		(((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
 static inline pte_t
 pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -118,9 +118,21 @@
 	return pte;
 }
 
-#else
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
-#define pte_page(x)		pfn_to_page(pte_pfn(x))
+#define pte_pfn(x)		((unsigned long)((x).pte_high >> 6))
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+	pte_t pte;
+
+	pte.pte_high = (pfn << 6) | (pgprot_val(prot) & 0x3f);
+	pte.pte_low = pgprot_val(prot);
+
+	return pte;
+}
+
+#else
 
 #ifdef CONFIG_CPU_VR41XX
 #define pte_pfn(x)		((unsigned long)((x).pte >> (PAGE_SHIFT + 2)))
@@ -131,6 +143,8 @@
 #endif
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
 
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+
 #define __pgd_offset(address)	pgd_index(address)
 #define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define __pmd_offset(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
@@ -166,7 +180,7 @@
 
 #else
 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#if defined(CONFIG_XPA)
 
 /* Swap entries must have VALID and GLOBAL bits cleared. */
 #define __swp_type(x)			(((x).val >> 4) & 0x1f)
@@ -175,6 +189,15 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t) { 0, (x).val })
 
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+
+/* Swap entries must have VALID and GLOBAL bits cleared. */
+#define __swp_type(x)			(((x).val >> 2) & 0x1f)
+#define __swp_offset(x)			 ((x).val >> 7)
+#define __swp_entry(type, offset)	((swp_entry_t)  { ((type) << 2) | ((offset) << 7) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_high })
+#define __swp_entry_to_pte(x)		((pte_t) { 0, (x).val })
+
 #else
 /*
  * Constraints:
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index cf661a2..514cbc0 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,7 +17,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
-#ifdef CONFIG_PAGE_SIZE_64KB
+#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
 #else
 #include <asm-generic/pgtable-nopud.h>
@@ -90,7 +90,11 @@
 #define PTE_ORDER		0
 #endif
 #ifdef CONFIG_PAGE_SIZE_16KB
-#define PGD_ORDER		0
+#ifdef CONFIG_MIPS_VA_BITS_48
+#define PGD_ORDER               1
+#else
+#define PGD_ORDER               0
+#endif
 #define PUD_ORDER		aieeee_attempt_to_allocate_pud
 #define PMD_ORDER		0
 #define PTE_ORDER		0
@@ -104,7 +108,11 @@
 #ifdef CONFIG_PAGE_SIZE_64KB
 #define PGD_ORDER		0
 #define PUD_ORDER		aieeee_attempt_to_allocate_pud
+#ifdef CONFIG_MIPS_VA_BITS_48
+#define PMD_ORDER		0
+#else
 #define PMD_ORDER		aieeee_attempt_to_allocate_pmd
+#endif
 #define PTE_ORDER		0
 #endif
 
@@ -114,11 +122,7 @@
 #endif
 #define PTRS_PER_PTE	((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t))
 
-#if PGDIR_SIZE >= TASK_SIZE64
-#define USER_PTRS_PER_PGD	(1)
-#else
-#define USER_PTRS_PER_PGD	(TASK_SIZE64 / PGDIR_SIZE)
-#endif
+#define USER_PTRS_PER_PGD       ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1)
 #define FIRST_USER_ADDRESS	0UL
 
 /*
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 97b3138..f88a48c 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -32,149 +32,132 @@
  * unpredictable things.  The code (when it is written) to deal with
  * this problem will be in the update_mmu_cache() code for the r4k.
  */
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#if defined(CONFIG_XPA)
 
 /*
- * The following bits are implemented by the TLB hardware
+ * Page table bit offsets used for 64 bit physical addressing on
+ * MIPS32r5 with XPA.
  */
-#define _PAGE_NO_EXEC_SHIFT	0
-#define _PAGE_NO_EXEC		(1 << _PAGE_NO_EXEC_SHIFT)
-#define _PAGE_NO_READ_SHIFT	(_PAGE_NO_EXEC_SHIFT + 1)
-#define _PAGE_NO_READ		(1 << _PAGE_NO_READ_SHIFT)
-#define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
-#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-#define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
-#define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
-#define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
-#define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
-#define _CACHE_SHIFT		(_PAGE_DIRTY_SHIFT + 1)
-#define _CACHE_MASK		(7 << _CACHE_SHIFT)
+enum pgtable_bits {
+	/* Used by TLB hardware (placed in EntryLo*) */
+	_PAGE_NO_EXEC_SHIFT,
+	_PAGE_NO_READ_SHIFT,
+	_PAGE_GLOBAL_SHIFT,
+	_PAGE_VALID_SHIFT,
+	_PAGE_DIRTY_SHIFT,
+	_CACHE_SHIFT,
 
-/*
- * The following bits are implemented in software
- */
-#define _PAGE_PRESENT_SHIFT	(24)
-#define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
-#define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
-#define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
-#define _PAGE_WRITE_SHIFT	(_PAGE_READ_SHIFT + 1)
-#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
-#define _PAGE_ACCESSED_SHIFT	(_PAGE_WRITE_SHIFT + 1)
-#define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
-#define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
-#define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
-
-#define _PFN_SHIFT		(PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
+	/* Used only by software (masked out before writing EntryLo*) */
+	_PAGE_PRESENT_SHIFT = 24,
+	_PAGE_WRITE_SHIFT,
+	_PAGE_ACCESSED_SHIFT,
+	_PAGE_MODIFIED_SHIFT,
+};
 
 /*
  * Bits for extended EntryLo0/EntryLo1 registers
  */
 #define _PFNX_MASK		0xffffff
 
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+
+/*
+ * Page table bit offsets used for 36 bit physical addressing on MIPS32,
+ * for example with Alchemy or Netlogic XLP/XLR.
+ */
+enum pgtable_bits {
+	/* Used by TLB hardware (placed in EntryLo*) */
+	_PAGE_GLOBAL_SHIFT,
+	_PAGE_VALID_SHIFT,
+	_PAGE_DIRTY_SHIFT,
+	_CACHE_SHIFT,
+
+	/* Used only by software (masked out before writing EntryLo*) */
+	_PAGE_PRESENT_SHIFT = _CACHE_SHIFT + 3,
+	_PAGE_NO_READ_SHIFT,
+	_PAGE_WRITE_SHIFT,
+	_PAGE_ACCESSED_SHIFT,
+	_PAGE_MODIFIED_SHIFT,
+};
+
 #elif defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 
-/*
- * The following bits are implemented in software
- */
-#define _PAGE_PRESENT_SHIFT	(0)
-#define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
-#define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
-#define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
-#define _PAGE_WRITE_SHIFT	(_PAGE_READ_SHIFT + 1)
-#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
-#define _PAGE_ACCESSED_SHIFT	(_PAGE_WRITE_SHIFT + 1)
-#define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
-#define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
-#define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
+/* Page table bits used for r3k systems */
+enum pgtable_bits {
+	/* Used only by software (writes to EntryLo ignored) */
+	_PAGE_PRESENT_SHIFT,
+	_PAGE_NO_READ_SHIFT,
+	_PAGE_WRITE_SHIFT,
+	_PAGE_ACCESSED_SHIFT,
+	_PAGE_MODIFIED_SHIFT,
 
-/*
- * The following bits are implemented by the TLB hardware
- */
-#define _PAGE_GLOBAL_SHIFT	(_PAGE_MODIFIED_SHIFT + 4)
-#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-#define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
-#define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
-#define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
-#define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
-#define _CACHE_UNCACHED_SHIFT	(_PAGE_DIRTY_SHIFT + 1)
-#define _CACHE_UNCACHED		(1 << _CACHE_UNCACHED_SHIFT)
-#define _CACHE_MASK		_CACHE_UNCACHED
-
-#define _PFN_SHIFT		PAGE_SHIFT
+	/* Used by TLB hardware (placed in EntryLo) */
+	_PAGE_GLOBAL_SHIFT = 8,
+	_PAGE_VALID_SHIFT,
+	_PAGE_DIRTY_SHIFT,
+	_CACHE_UNCACHED_SHIFT,
+};
 
 #else
-/*
- * Below are the "Normal" R4K cases
- */
 
-/*
- * The following bits are implemented in software
- */
-#define _PAGE_PRESENT_SHIFT	0
-#define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
-/* R2 or later cores check for RI/XI support to determine _PAGE_READ */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-#define _PAGE_WRITE_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
-#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
-#else
-#define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
-#define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
-#define _PAGE_WRITE_SHIFT	(_PAGE_READ_SHIFT + 1)
-#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
+/* Page table bits used for r4k systems */
+enum pgtable_bits {
+	/* Used only by software (masked out before writing EntryLo*) */
+	_PAGE_PRESENT_SHIFT,
+#if !defined(CONFIG_CPU_HAS_RIXI)
+	_PAGE_NO_READ_SHIFT,
 #endif
-#define _PAGE_ACCESSED_SHIFT	(_PAGE_WRITE_SHIFT + 1)
-#define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
-#define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
-#define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
-
+	_PAGE_WRITE_SHIFT,
+	_PAGE_ACCESSED_SHIFT,
+	_PAGE_MODIFIED_SHIFT,
 #if defined(CONFIG_64BIT) && defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
-/* Huge TLB page */
-#define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
-#define _PAGE_HUGE		(1 << _PAGE_HUGE_SHIFT)
-#endif	/* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
-
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-/* XI - page cannot be executed */
-#ifdef _PAGE_HUGE_SHIFT
-#define _PAGE_NO_EXEC_SHIFT	(_PAGE_HUGE_SHIFT + 1)
-#else
-#define _PAGE_NO_EXEC_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
+	_PAGE_HUGE_SHIFT,
 #endif
-#define _PAGE_NO_EXEC		(cpu_has_rixi ? (1 << _PAGE_NO_EXEC_SHIFT) : 0)
 
-/* RI - page cannot be read */
-#define _PAGE_READ_SHIFT	(_PAGE_NO_EXEC_SHIFT + 1)
-#define _PAGE_READ		(cpu_has_rixi ? 0 : (1 << _PAGE_READ_SHIFT))
-#define _PAGE_NO_READ_SHIFT	_PAGE_READ_SHIFT
-#define _PAGE_NO_READ		(cpu_has_rixi ? (1 << _PAGE_READ_SHIFT) : 0)
-#endif	/* defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) */
-
-#if defined(_PAGE_NO_READ_SHIFT)
-#define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
-#elif defined(_PAGE_HUGE_SHIFT)
-#define _PAGE_GLOBAL_SHIFT	(_PAGE_HUGE_SHIFT + 1)
-#else
-#define _PAGE_GLOBAL_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
+	/* Used by TLB hardware (placed in EntryLo*) */
+#if defined(CONFIG_CPU_HAS_RIXI)
+	_PAGE_NO_EXEC_SHIFT,
+	_PAGE_NO_READ_SHIFT,
 #endif
-#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-
-#define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
-#define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
-#define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
-#define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
-#define _CACHE_SHIFT		(_PAGE_DIRTY_SHIFT + 1)
-#define _CACHE_MASK		(7 << _CACHE_SHIFT)
-
-#define _PFN_SHIFT		(PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
+	_PAGE_GLOBAL_SHIFT,
+	_PAGE_VALID_SHIFT,
+	_PAGE_DIRTY_SHIFT,
+	_CACHE_SHIFT,
+};
 
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT && defined(CONFIG_CPU_MIPS32) */
 
+/* Used only by software */
+#define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
+#define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
+#define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
+#if defined(CONFIG_64BIT) && defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
+# define _PAGE_HUGE		(1 << _PAGE_HUGE_SHIFT)
+#endif
+
+/* Used by TLB hardware (placed in EntryLo*) */
+#if defined(CONFIG_XPA)
+# define _PAGE_NO_EXEC		(1 << _PAGE_NO_EXEC_SHIFT)
+#elif defined(CONFIG_CPU_HAS_RIXI)
+# define _PAGE_NO_EXEC		(cpu_has_rixi ? (1 << _PAGE_NO_EXEC_SHIFT) : 0)
+#endif
+#define _PAGE_NO_READ		(1 << _PAGE_NO_READ_SHIFT)
+#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
+#define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
+#define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
+#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+# define _CACHE_UNCACHED	(1 << _CACHE_UNCACHED_SHIFT)
+# define _CACHE_MASK		_CACHE_UNCACHED
+# define _PFN_SHIFT		PAGE_SHIFT
+#else
+# define _CACHE_MASK		(7 << _CACHE_SHIFT)
+# define _PFN_SHIFT		(PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
+#endif
+
 #ifndef _PAGE_NO_EXEC
 #define _PAGE_NO_EXEC		0
 #endif
-#ifndef _PAGE_NO_READ
-#define _PAGE_NO_READ		0
-#endif
 
 #define _PAGE_SILENT_READ	_PAGE_VALID
 #define _PAGE_SILENT_WRITE	_PAGE_DIRTY
@@ -191,14 +174,13 @@
  */
 
 
-#ifndef __ASSEMBLY__
 /*
  * pte_to_entrylo converts a page table entry (PTE) into a Mips
  * entrylo0/1 value.
  */
 static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 {
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
+#ifdef CONFIG_CPU_HAS_RIXI
 	if (cpu_has_rixi) {
 		int sa;
 #ifdef CONFIG_32BIT
@@ -218,7 +200,6 @@
 
 	return pte_val >> _PAGE_GLOBAL_SHIFT;
 }
-#endif
 
 /*
  * Cache attributes
@@ -274,7 +255,7 @@
 #define _CACHE_UNCACHED_ACCELERATED	(7<<_CACHE_SHIFT)
 #endif
 
-#define __READABLE	(_PAGE_SILENT_READ | _PAGE_READ | _PAGE_ACCESSED)
+#define __READABLE	(_PAGE_SILENT_READ | _PAGE_ACCESSED)
 #define __WRITEABLE	(_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED)
 
 #define _PAGE_CHG_MASK	(_PAGE_ACCESSED | _PAGE_MODIFIED |	\
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f53a7e3a4..a6b611f 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -23,18 +23,19 @@
 struct mm_struct;
 struct vm_area_struct;
 
-#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _CACHE_CACHABLE_NONCOHERENT)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_READ | \
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
+				 _CACHE_CACHABLE_NONCOHERENT)
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
-#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_NO_EXEC | \
+#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
 				 _page_cachable_default)
-#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | \
 				 _page_cachable_default)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _page_cachable_default)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _CACHE_CACHABLE_NONCOHERENT)
-#define PAGE_USERIO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define PAGE_USERIO	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_KERNEL_UNCACHED __pgprot(_PAGE_PRESENT | __READABLE | \
 			__WRITEABLE | _PAGE_GLOBAL | _CACHE_UNCACHED)
@@ -127,10 +128,19 @@
 	}								\
 } while(0)
 
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval);
+
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
-#define pte_none(pte)		(!(((pte).pte_high) & ~_PAGE_GLOBAL))
+#ifdef CONFIG_XPA
+# define pte_none(pte)		(!(((pte).pte_high) & ~_PAGE_GLOBAL))
+#else
+# define pte_none(pte)		(!(((pte).pte_low | (pte).pte_high) & ~_PAGE_GLOBAL))
+#endif
+
 #define pte_present(pte)	((pte).pte_low & _PAGE_PRESENT)
+#define pte_no_exec(pte)	((pte).pte_low & _PAGE_NO_EXEC)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
@@ -138,17 +148,23 @@
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
 
+#ifdef CONFIG_XPA
 	if (pte.pte_high & _PAGE_GLOBAL) {
+#else
+	if (pte.pte_low & _PAGE_GLOBAL) {
+#endif
 		pte_t *buddy = ptep_buddy(ptep);
 		/*
 		 * Make sure the buddy is global too (if it's !none,
 		 * it better already be global)
 		 */
-		if (pte_none(*buddy))
+		if (pte_none(*buddy)) {
+			if (!config_enabled(CONFIG_XPA))
+				buddy->pte_low |= _PAGE_GLOBAL;
 			buddy->pte_high |= _PAGE_GLOBAL;
+		}
 	}
 }
-#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -156,8 +172,13 @@
 
 	htw_stop();
 	/* Preserve global status for the pair */
-	if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL)
-		null.pte_high = _PAGE_GLOBAL;
+	if (config_enabled(CONFIG_XPA)) {
+		if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL)
+			null.pte_high = _PAGE_GLOBAL;
+	} else {
+		if (ptep_buddy(ptep)->pte_low & _PAGE_GLOBAL)
+			null.pte_low = null.pte_high = _PAGE_GLOBAL;
+	}
 
 	set_pte_at(mm, addr, ptep, null);
 	htw_start();
@@ -166,6 +187,7 @@
 
 #define pte_none(pte)		(!(pte_val(pte) & ~_PAGE_GLOBAL))
 #define pte_present(pte)	(pte_val(pte) & _PAGE_PRESENT)
+#define pte_no_exec(pte)	(pte_val(pte) & _PAGE_NO_EXEC)
 
 /*
  * Certain architectures need to do special things when pte's
@@ -187,30 +209,42 @@
 		 * For SMP, multiple CPUs can race, so we need to do
 		 * this atomically.
 		 */
-#ifdef CONFIG_64BIT
-#define LL_INSN "lld"
-#define SC_INSN "scd"
-#else /* CONFIG_32BIT */
-#define LL_INSN "ll"
-#define SC_INSN "sc"
-#endif
 		unsigned long page_global = _PAGE_GLOBAL;
 		unsigned long tmp;
 
-		__asm__ __volatile__ (
-			"	.set	push\n"
-			"	.set	noreorder\n"
-			"1:	" LL_INSN "	%[tmp], %[buddy]\n"
-			"	bnez	%[tmp], 2f\n"
-			"	 or	%[tmp], %[tmp], %[global]\n"
-			"	" SC_INSN "	%[tmp], %[buddy]\n"
-			"	beqz	%[tmp], 1b\n"
-			"	 nop\n"
-			"2:\n"
-			"	.set pop"
-			: [buddy] "+m" (buddy->pte),
-			  [tmp] "=&r" (tmp)
+		if (kernel_uses_llsc && R10000_LLSC_WAR) {
+			__asm__ __volatile__ (
+			"	.set	arch=r4000			\n"
+			"	.set	push				\n"
+			"	.set	noreorder			\n"
+			"1:"	__LL	"%[tmp], %[buddy]		\n"
+			"	bnez	%[tmp], 2f			\n"
+			"	 or	%[tmp], %[tmp], %[global]	\n"
+				__SC	"%[tmp], %[buddy]		\n"
+			"	beqzl	%[tmp], 1b			\n"
+			"	nop					\n"
+			"2:						\n"
+			"	.set	pop				\n"
+			"	.set	mips0				\n"
+			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
 			: [global] "r" (page_global));
+		} else if (kernel_uses_llsc) {
+			__asm__ __volatile__ (
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+			"	.set	push				\n"
+			"	.set	noreorder			\n"
+			"1:"	__LL	"%[tmp], %[buddy]		\n"
+			"	bnez	%[tmp], 2f			\n"
+			"	 or	%[tmp], %[tmp], %[global]	\n"
+				__SC	"%[tmp], %[buddy]		\n"
+			"	beqz	%[tmp], 1b			\n"
+			"	nop					\n"
+			"2:						\n"
+			"	.set	pop				\n"
+			"	.set	mips0				\n"
+			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
+			: [global] "r" (page_global));
+		}
 #else /* !CONFIG_SMP */
 		if (pte_none(*buddy))
 			pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL;
@@ -218,7 +252,6 @@
 	}
 #endif
 }
-#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -234,6 +267,22 @@
 }
 #endif
 
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	extern void __update_cache(unsigned long address, pte_t pte);
+
+	if (!pte_present(pteval))
+		goto cache_sync_done;
+
+	if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
+		goto cache_sync_done;
+
+	__update_cache(addr, pteval);
+cache_sync_done:
+	set_pte(ptep, pteval);
+}
+
 /*
  * (pmds are folded into puds so this doesn't get actually called,
  * but the define is needed for a generic inline function.)
@@ -270,6 +319,8 @@
 static inline pte_t pte_wrprotect(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_WRITE;
+	if (!config_enabled(CONFIG_XPA))
+		pte.pte_low &= ~_PAGE_SILENT_WRITE;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
 }
@@ -277,6 +328,8 @@
 static inline pte_t pte_mkclean(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_MODIFIED;
+	if (!config_enabled(CONFIG_XPA))
+		pte.pte_low &= ~_PAGE_SILENT_WRITE;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
 }
@@ -284,6 +337,8 @@
 static inline pte_t pte_mkold(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_ACCESSED;
+	if (!config_enabled(CONFIG_XPA))
+		pte.pte_low &= ~_PAGE_SILENT_READ;
 	pte.pte_high &= ~_PAGE_SILENT_READ;
 	return pte;
 }
@@ -291,24 +346,33 @@
 static inline pte_t pte_mkwrite(pte_t pte)
 {
 	pte.pte_low |= _PAGE_WRITE;
-	if (pte.pte_low & _PAGE_MODIFIED)
+	if (pte.pte_low & _PAGE_MODIFIED) {
+		if (!config_enabled(CONFIG_XPA))
+			pte.pte_low |= _PAGE_SILENT_WRITE;
 		pte.pte_high |= _PAGE_SILENT_WRITE;
+	}
 	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
 	pte.pte_low |= _PAGE_MODIFIED;
-	if (pte.pte_low & _PAGE_WRITE)
+	if (pte.pte_low & _PAGE_WRITE) {
+		if (!config_enabled(CONFIG_XPA))
+			pte.pte_low |= _PAGE_SILENT_WRITE;
 		pte.pte_high |= _PAGE_SILENT_WRITE;
+	}
 	return pte;
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
 	pte.pte_low |= _PAGE_ACCESSED;
-	if (pte.pte_low & _PAGE_READ)
+	if (!(pte.pte_low & _PAGE_NO_READ)) {
+		if (!config_enabled(CONFIG_XPA))
+			pte.pte_low |= _PAGE_SILENT_READ;
 		pte.pte_high |= _PAGE_SILENT_READ;
+	}
 	return pte;
 }
 #else
@@ -353,13 +417,8 @@
 static inline pte_t pte_mkyoung(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_ACCESSED;
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	if (!(pte_val(pte) & _PAGE_NO_READ))
 		pte_val(pte) |= _PAGE_SILENT_READ;
-	else
-#endif
-	if (pte_val(pte) & _PAGE_READ)
-		pte_val(pte) |= _PAGE_SILENT_READ;
 	return pte;
 }
 
@@ -411,7 +470,7 @@
  */
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#if defined(CONFIG_XPA)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
@@ -420,6 +479,15 @@
 	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
 	return pte;
 }
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte.pte_low  &= _PAGE_CHG_MASK;
+	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
+	pte.pte_low  |= pgprot_val(newprot);
+	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
+	return pte;
+}
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
@@ -430,15 +498,12 @@
 
 extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
 	pte_t pte);
-extern void __update_cache(struct vm_area_struct *vma, unsigned long address,
-	pte_t pte);
 
 static inline void update_mmu_cache(struct vm_area_struct *vma,
 	unsigned long address, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 	__update_tlb(vma, address, pte);
-	__update_cache(vma, address, pte);
 }
 
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
@@ -543,13 +608,8 @@
 {
 	pmd_val(pmd) |= _PAGE_ACCESSED;
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	if (!(pmd_val(pmd) & _PAGE_NO_READ))
 		pmd_val(pmd) |= _PAGE_SILENT_READ;
-	else
-#endif
-	if (pmd_val(pmd) & _PAGE_READ)
-		pmd_val(pmd) |= _PAGE_SILENT_READ;
 
 	return pmd;
 }
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 041153f..7e78b62 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -63,7 +63,11 @@
  * 8192EB ...
  */
 #define TASK_SIZE32	0x7fff8000UL
-#define TASK_SIZE64	0x10000000000UL
+#ifdef CONFIG_MIPS_VA_BITS_48
+#define TASK_SIZE64     (0x1UL << ((cpu_data[0].vmbits>48)?48:cpu_data[0].vmbits))
+#else
+#define TASK_SIZE64     0x10000000000UL
+#endif
 #define TASK_SIZE (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64)
 #define STACK_TOP_MAX	TASK_SIZE64
 
@@ -355,6 +359,10 @@
  */
 extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp);
 
+static inline void flush_thread(void)
+{
+}
+
 unsigned long get_wchan(struct task_struct *p);
 
 #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
diff --git a/arch/mips/include/asm/seccomp.h b/arch/mips/include/asm/seccomp.h
index 1d8a2e2..684fb3a 100644
--- a/arch/mips/include/asm/seccomp.h
+++ b/arch/mips/include/asm/seccomp.h
@@ -2,27 +2,32 @@
 
 #include <linux/unistd.h>
 
-/*
- * Kludge alert:
- *
- * The generic seccomp code currently allows only a single compat ABI.	Until
- * this is fixed we priorize O32 as the compat ABI over N32.
- */
-#ifdef CONFIG_MIPS32_O32
+#ifdef CONFIG_COMPAT
+static inline const int *get_compat_mode1_syscalls(void)
+{
+	static const int syscalls_O32[] = {
+		__NR_O32_Linux + 3, __NR_O32_Linux + 4,
+		__NR_O32_Linux + 1, __NR_O32_Linux + 193,
+		0, /* null terminated */
+	};
+	static const int syscalls_N32[] = {
+		__NR_N32_Linux + 0, __NR_N32_Linux + 1,
+		__NR_N32_Linux + 58, __NR_N32_Linux + 211,
+		0, /* null terminated */
+	};
 
-#define __NR_seccomp_read_32		4003
-#define __NR_seccomp_write_32		4004
-#define __NR_seccomp_exit_32		4001
-#define __NR_seccomp_sigreturn_32	4193	/* rt_sigreturn */
+	if (config_enabled(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS))
+		return syscalls_O32;
 
-#elif defined(CONFIG_MIPS32_N32)
+	if (config_enabled(CONFIG_MIPS32_N32))
+		return syscalls_N32;
 
-#define __NR_seccomp_read_32		6000
-#define __NR_seccomp_write_32		6001
-#define __NR_seccomp_exit_32		6058
-#define __NR_seccomp_sigreturn_32	6211	/* rt_sigreturn */
+	BUG();
+}
 
-#endif /* CONFIG_MIPS32_O32 */
+#define get_compat_mode1_syscalls get_compat_mode1_syscalls
+
+#endif /* CONFIG_COMPAT */
 
 #include <asm-generic/seccomp.h>
 
diff --git a/arch/mips/include/asm/sibyte/bcm1480_regs.h b/arch/mips/include/asm/sibyte/bcm1480_regs.h
index ec0dacf..32a8483 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_regs.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_regs.h
@@ -415,8 +415,8 @@
 					(cpu)*BCM1480_IMR_ALIAS_MAILBOX_SPACING)
 #define A_BCM1480_IMR_ALIAS_MAILBOX_REGISTER(cpu, reg) (A_BCM1480_IMR_ALIAS_MAILBOX(cpu)+(reg))
 
-#define R_BCM1480_IMR_ALIAS_MAILBOX_0		0x0000		/* 0x0x0 */
-#define R_BCM1480_IMR_ALIAS_MAILBOX_0_SET	0x0008		/* 0x0x8 */
+#define R_BCM1480_IMR_ALIAS_MAILBOX_0		0x0000
+#define R_BCM1480_IMR_ALIAS_MAILBOX_0_SET	0x0008
 
 /*
  * these macros work together to build the address of a mailbox
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index 003e273..2292373 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -11,11 +11,17 @@
 
 #include <uapi/asm/signal.h>
 
+#ifdef CONFIG_MIPS32_COMPAT
+extern struct mips_abi mips_abi_32;
 
-#ifdef CONFIG_TRAD_SIGNALS
-#define sig_uses_siginfo(ka)	((ka)->sa.sa_flags & SA_SIGINFO)
+#define sig_uses_siginfo(ka, abi)                               \
+	((abi != &mips_abi_32) ? 1 :                            \
+		((ka)->sa.sa_flags & SA_SIGINFO))
 #else
-#define sig_uses_siginfo(ka)	(1)
+#define sig_uses_siginfo(ka, abi)                               \
+	(config_enabled(CONFIG_64BIT) ? 1 :                     \
+		(config_enabled(CONFIG_TRAD_SIGNALS) ?          \
+			((ka)->sa.sa_flags & SA_SIGINFO) : 1) )
 #endif
 
 #include <asm/sigcontext.h>
diff --git a/arch/mips/include/asm/smp-cps.h b/arch/mips/include/asm/smp-cps.h
index 326c16e..2ae1f61 100644
--- a/arch/mips/include/asm/smp-cps.h
+++ b/arch/mips/include/asm/smp-cps.h
@@ -29,7 +29,7 @@
 extern void mips_cps_core_entry(void);
 extern void mips_cps_core_init(void);
 
-extern struct vpe_boot_config *mips_cps_boot_vpes(void);
+extern void mips_cps_boot_vpes(struct core_boot_config *cfg, unsigned vpe);
 
 extern void mips_cps_pm_save(void);
 extern void mips_cps_pm_restore(void);
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 28b5d84a..ebb5c0f 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -105,7 +105,7 @@
 	__clear_software_ll_bit();					\
 	if (cpu_has_userlocal)						\
 		write_c0_userlocal(task_thread_info(next)->tp_value);	\
-	__restore_watch();						\
+	__restore_watch(next);						\
 	(last) = resume(prev, next, task_thread_info(next));		\
 } while (0)
 
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index fc1cdd2..b6ecfee 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -171,7 +171,8 @@
 Ip_u3u1u2(_xor);
 Ip_u2u1u3(_xori);
 Ip_u2u1(_yield);
-
+Ip_u1u2(_ldpte);
+Ip_u2u1u3(_lddir);
 
 /* Handle labels. */
 struct uasm_label {
diff --git a/arch/mips/include/asm/watch.h b/arch/mips/include/asm/watch.h
index 20126ec..6ffe3ea 100644
--- a/arch/mips/include/asm/watch.h
+++ b/arch/mips/include/asm/watch.h
@@ -12,21 +12,21 @@
 
 #include <asm/mipsregs.h>
 
-void mips_install_watch_registers(void);
+void mips_install_watch_registers(struct task_struct *t);
 void mips_read_watch_registers(void);
 void mips_clear_watch_registers(void);
 void mips_probe_watch_registers(struct cpuinfo_mips *c);
 
 #ifdef CONFIG_HARDWARE_WATCHPOINTS
-#define __restore_watch() do {						\
+#define __restore_watch(task) do {					\
 	if (unlikely(test_bit(TIF_LOAD_WATCH,				\
-			      &current_thread_info()->flags))) {	\
-		mips_install_watch_registers();				\
+			      &task_thread_info(task)->flags))) {	\
+		mips_install_watch_registers(task);			\
 	}								\
 } while (0)
 
 #else
-#define __restore_watch() do {} while (0)
+#define __restore_watch(task) do {} while (0)
 #endif
 
 #endif /* _ASM_WATCH_H */
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index ddea53e..8051f9a 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -167,6 +167,7 @@
 	fceill_op    =	0x0a, ffloorl_op   =  0x0b,
 	fround_op    =	0x0c, ftrunc_op	   =  0x0d,
 	fceil_op     =	0x0e, ffloor_op	   =  0x0f,
+	fsel_op      =  0x10,
 	fmovc_op     =	0x11, fmovz_op	   =  0x12,
 	fmovn_op     =	0x13, fseleqz_op   =  0x14,
 	frecip_op    =  0x15, frsqrt_op    =  0x16,
@@ -204,6 +205,16 @@
 };
 
 /*
+ * func field for page table walker (Loongson-3).
+ */
+enum ptw_func {
+	lwdir_op = 0x00,
+	lwpte_op = 0x01,
+	lddir_op = 0x02,
+	ldpte_op = 0x03,
+};
+
+/*
  * func field for special3 lx opcodes (Cavium Octeon).
  */
 enum lx_func {
diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h
index cc49dc2..8069cf7 100644
--- a/arch/mips/include/uapi/asm/siginfo.h
+++ b/arch/mips/include/uapi/asm/siginfo.h
@@ -28,7 +28,7 @@
 
 #define __ARCH_SIGSYS
 
-#include <uapi/asm-generic/siginfo.h>
+#include <asm-generic/siginfo.h>
 
 /* We can't use generic siginfo_t, because our si_code and si_errno are swapped */
 typedef struct siginfo {
@@ -42,13 +42,13 @@
 
 		/* kill() */
 		struct {
-			pid_t _pid;		/* sender's pid */
+			__kernel_pid_t _pid;	/* sender's pid */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 		} _kill;
 
 		/* POSIX.1b timers */
 		struct {
-			timer_t _tid;		/* timer id */
+			__kernel_timer_t _tid;	/* timer id */
 			int _overrun;		/* overrun count */
 			char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
 			sigval_t _sigval;	/* same as below */
@@ -57,26 +57,26 @@
 
 		/* POSIX.1b signals */
 		struct {
-			pid_t _pid;		/* sender's pid */
+			__kernel_pid_t _pid;	/* sender's pid */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 			sigval_t _sigval;
 		} _rt;
 
 		/* SIGCHLD */
 		struct {
-			pid_t _pid;		/* which child */
+			__kernel_pid_t _pid;	/* which child */
 			__ARCH_SI_UID_T _uid;	/* sender's uid */
 			int _status;		/* exit code */
-			clock_t _utime;
-			clock_t _stime;
+			__kernel_clock_t _utime;
+			__kernel_clock_t _stime;
 		} _sigchld;
 
 		/* IRIX SIGCHLD */
 		struct {
-			pid_t _pid;		/* which child */
-			clock_t _utime;
+			__kernel_pid_t _pid;	/* which child */
+			__kernel_clock_t _utime;
 			int _status;		/* exit code */
-			clock_t _stime;
+			__kernel_clock_t _stime;
 		} _irix_sigchld;
 
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
@@ -123,6 +123,4 @@
 #define SI_TIMER __SI_CODE(__SI_TIMER, -3) /* sent by timer expiration */
 #define SI_MESGQ __SI_CODE(__SI_MESGQ, -4) /* sent by real time mesq state change */
 
-#include <asm-generic/siginfo.h>
-
 #endif /* _UAPI_ASM_SIGINFO_H */
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 934b15b..4e3f9b7a 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -39,8 +39,6 @@
 
 #include "clock.h"
 
-static bool is_avt2;
-
 /* GPIOs */
 #define QI_LB60_GPIO_SD_CD		JZ_GPIO_PORTD(0)
 #define QI_LB60_GPIO_SD_VCC_EN_N	JZ_GPIO_PORTD(2)
@@ -367,43 +365,12 @@
 	.power_active_low	= 1,
 };
 
-/* OHCI */
-static struct regulator_consumer_supply avt2_usb_regulator_consumer =
-	REGULATOR_SUPPLY("vbus", "jz4740-ohci");
-
-static struct regulator_init_data avt2_usb_regulator_init_data = {
-	.num_consumer_supplies = 1,
-	.consumer_supplies = &avt2_usb_regulator_consumer,
-	.constraints = {
-		.name = "USB power",
-		.min_uV = 5000000,
-		.max_uV = 5000000,
-		.valid_modes_mask = REGULATOR_MODE_NORMAL,
-		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
-	},
-};
-
-static struct fixed_voltage_config avt2_usb_regulator_data = {
-	.supply_name = "USB power",
-	.microvolts = 5000000,
-	.gpio = JZ_GPIO_PORTB(17),
-	.init_data = &avt2_usb_regulator_init_data,
-};
-
-static struct platform_device avt2_usb_regulator_device = {
-	.name = "reg-fixed-voltage",
-	.id = -1,
-	.dev = {
-		.platform_data = &avt2_usb_regulator_data,
-	}
-};
-
+/* beeper */
 static struct pwm_lookup qi_lb60_pwm_lookup[] = {
 	PWM_LOOKUP("jz4740-pwm", 4, "pwm-beeper", NULL, 0,
 		   PWM_POLARITY_NORMAL),
 };
 
-/* beeper */
 static struct platform_device qi_lb60_pwm_beeper = {
 	.name = "pwm-beeper",
 	.id = -1,
@@ -487,11 +454,6 @@
 	spi_register_board_info(qi_lb60_spi_board_info,
 				ARRAY_SIZE(qi_lb60_spi_board_info));
 
-	if (is_avt2) {
-		platform_device_register(&avt2_usb_regulator_device);
-		platform_device_register(&jz4740_usb_ohci_device);
-	}
-
 	pwm_add_table(qi_lb60_pwm_lookup, ARRAY_SIZE(qi_lb60_pwm_lookup));
 
 	return platform_add_devices(jz_platform_devices,
@@ -499,19 +461,9 @@
 
 }
 
-static __init int board_avt2(char *str)
-{
-	qi_lb60_mmc_pdata.card_detect_active_low = 1;
-	is_avt2 = true;
-
-	return 1;
-}
-__setup("avt2", board_avt2);
-
 static int __init qi_lb60_board_setup(void)
 {
-	printk(KERN_INFO "Qi Hardware JZ4740 QI %s setup\n",
-		is_avt2 ? "AVT2" : "LB60");
+	printk(KERN_INFO "Qi Hardware JZ4740 QI LB60 setup\n");
 
 	board_gpio_setup();
 
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
index e8a463b..2f1dab3 100644
--- a/arch/mips/jz4740/platform.c
+++ b/arch/mips/jz4740/platform.c
@@ -32,31 +32,6 @@
 
 #include "clock.h"
 
-/* OHCI controller */
-static struct resource jz4740_usb_ohci_resources[] = {
-	{
-		.start	= JZ4740_UHC_BASE_ADDR,
-		.end	= JZ4740_UHC_BASE_ADDR + 0x1000 - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	{
-		.start	= JZ4740_IRQ_UHC,
-		.end	= JZ4740_IRQ_UHC,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-struct platform_device jz4740_usb_ohci_device = {
-	.name		= "jz4740-ohci",
-	.id		= -1,
-	.dev = {
-		.dma_mask = &jz4740_usb_ohci_device.dev.coherent_dma_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(32),
-	},
-	.num_resources	= ARRAY_SIZE(jz4740_usb_ohci_resources),
-	.resource	= jz4740_usb_ohci_resources,
-};
-
 /* USB Device Controller */
 struct platform_device jz4740_udc_xceiv_device = {
 	.name = "usb_phy_generic",
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index b0988fd..e6053d0 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -44,7 +44,7 @@
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP_UP)		+= smp-up.o
-obj-$(CONFIG_CPU_BMIPS)		+= smp-bmips.o bmips_vec.o
+obj-$(CONFIG_CPU_BMIPS)		+= smp-bmips.o bmips_vec.o bmips_5xxx_init.o
 
 obj-$(CONFIG_MIPS_MT)		+= mips-mt.o
 obj-$(CONFIG_MIPS_MT_FPAFF)	+= mips-mt-fpaff.o
@@ -83,6 +83,8 @@
 
 obj-$(CONFIG_GPIO_TXX9)		+= gpio_txx9.o
 
+obj-$(CONFIG_RELOCATABLE)	+= relocate.o
+
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 154e203..1ea973b 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/kbuild.h>
 #include <linux/suspend.h>
+#include <asm/cpu-info.h>
 #include <asm/pm.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -338,6 +339,15 @@
 }
 #endif
 
+void output_cpuinfo_defines(void)
+{
+	COMMENT(" MIPS cpuinfo offsets. ");
+	DEFINE(CPUINFO_SIZE, sizeof(struct cpuinfo_mips));
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	OFFSET(CPUINFO_ASID_MASK, cpuinfo_mips, asid_mask);
+#endif
+}
+
 void output_kvm_defines(void)
 {
 	COMMENT(" KVM/MIPS Specfic offsets. ");
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 1b992c6..58ad63d 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -30,21 +30,7 @@
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(hdr)						\
-({									\
-	int __res = 1;							\
-	struct elfhdr *__h = (hdr);					\
-									\
-	if (!mips_elf_check_machine(__h))				\
-		__res = 0;						\
-	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
-		__res = 0;						\
-	if (((__h->e_flags & EF_MIPS_ABI2) == 0) ||			\
-	    ((__h->e_flags & EF_MIPS_ABI) != 0))			\
-		__res = 0;						\
-									\
-	__res;								\
-})
+#define elf_check_arch elfn32_check_arch
 
 #define TASK32_SIZE		0x7fff8000UL
 #undef ELF_ET_DYN_BASE
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index abd3aff..49fb881 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -28,39 +28,9 @@
 typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 
 /*
- * In order to be sure that we don't attempt to execute an O32 binary which
- * requires 64 bit FP (FR=1) on a system which does not support it we refuse
- * to execute any binary which has bits specified by the following macro set
- * in its ELF header flags.
- */
-#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
-# define __MIPS_O32_FP64_MUST_BE_ZERO	0
-#else
-# define __MIPS_O32_FP64_MUST_BE_ZERO	EF_MIPS_FP64
-#endif
-
-/*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(hdr)						\
-({									\
-	int __res = 1;							\
-	struct elfhdr *__h = (hdr);					\
-									\
-	if (!mips_elf_check_machine(__h))				\
-		__res = 0;						\
-	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
-		__res = 0;						\
-	if ((__h->e_flags & EF_MIPS_ABI2) != 0)				\
-		__res = 0;						\
-	if (((__h->e_flags & EF_MIPS_ABI) != 0) &&			\
-	    ((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32))		\
-		__res = 0;						\
-	if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO)		\
-		__res = 0;						\
-									\
-	__res;								\
-})
+#define elf_check_arch elfo32_check_arch
 
 #ifdef CONFIG_KVM_GUEST
 #define TASK32_SIZE		0x3fff8000UL
diff --git a/arch/mips/kernel/bmips_5xxx_init.S b/arch/mips/kernel/bmips_5xxx_init.S
new file mode 100644
index 0000000..adaa82e
--- /dev/null
+++ b/arch/mips/kernel/bmips_5xxx_init.S
@@ -0,0 +1,753 @@
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011-2012 by Broadcom Corporation
+ *
+ * Init for bmips 5000.
+ * Used to init second core in dual core 5000's.
+ */
+
+#include <linux/init.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cacheops.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+#include <asm/addrspace.h>
+#include <asm/hazards.h>
+#include <asm/bmips.h>
+
+#ifdef CONFIG_CPU_BMIPS5000
+
+
+#define cacheop(kva, size, linesize, op) 	\
+	.set noreorder			;	\
+	addu		t1, kva, size	;	\
+	subu		t2, linesize, 1	;	\
+	not		t2		;	\
+	and		t0, kva, t2	;	\
+	addiu		t1, t1, -1	;	\
+	and		t1, t2		;	\
+9:	cache		op, 0(t0)	;	\
+	bne		t0, t1, 9b	;	\
+	 addu		t0, linesize	;	\
+	.set reorder			;
+
+
+
+#define	IS_SHIFT	22
+#define	IL_SHIFT	19
+#define	IA_SHIFT	16
+#define	DS_SHIFT	13
+#define	DL_SHIFT	10
+#define	DA_SHIFT	 7
+#define	IS_MASK		 7
+#define	IL_MASK		 7
+#define	IA_MASK		 7
+#define	DS_MASK		 7
+#define	DL_MASK		 7
+#define	DA_MASK		 7
+#define	ICE_MASK	0x80000000
+#define	DCE_MASK	0x40000000
+
+#define CP0_BRCM_CONFIG0	$22, 0
+#define CP0_BRCM_MODE		$22, 1
+#define	CP0_CONFIG_K0_MASK	7
+
+#define CP0_ICACHE_TAG_LO	$28
+#define CP0_ICACHE_DATA_LO	$28, 1
+#define CP0_DCACHE_TAG_LO	$28, 2
+#define CP0_D_SEC_CACHE_DATA_LO	$28, 3
+#define CP0_ICACHE_TAG_HI	$29
+#define CP0_ICACHE_DATA_HI	$29, 1
+#define CP0_DCACHE_TAG_HI	$29, 2
+
+#define CP0_BRCM_MODE_Luc_MASK		(1 << 11)
+#define	CP0_BRCM_CONFIG0_CWF_MASK	(1 << 20)
+#define	CP0_BRCM_CONFIG0_TSE_MASK	(1 << 19)
+#define CP0_BRCM_MODE_SET_MASK		(1 << 7)
+#define CP0_BRCM_MODE_ClkRATIO_MASK	(7 << 4)
+#define CP0_BRCM_MODE_BrPRED_MASK 	(3 << 24)
+#define CP0_BRCM_MODE_BrPRED_SHIFT	24
+#define CP0_BRCM_MODE_BrHIST_MASK 	(0x1f << 20)
+#define CP0_BRCM_MODE_BrHIST_SHIFT	20
+
+/* ZSC L2 Cache Register Access Register Definitions */
+#define BRCM_ZSC_ALL_REGS_SELECT		0x7 << 24
+
+#define BRCM_ZSC_CONFIG_REG			0 << 3
+#define BRCM_ZSC_REQ_BUFFER_REG			2 << 3
+#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG0		4 << 3
+#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG1		6 << 3
+#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG2		8 << 3
+
+#define BRCM_ZSC_SCB0_ADDR_MAPPING_REG0		0xa << 3
+#define BRCM_ZSC_SCB0_ADDR_MAPPING_REG1		0xc << 3
+
+#define BRCM_ZSC_SCB1_ADDR_MAPPING_REG0		0xe << 3
+#define BRCM_ZSC_SCB1_ADDR_MAPPING_REG1		0x10 << 3
+
+#define BRCM_ZSC_CONFIG_LMB1En			1 << (15)
+#define BRCM_ZSC_CONFIG_LMB0En			1 << (14)
+
+/* branch predition values */
+
+#define BRCM_BrPRED_ALL_TAKEN		(0x0)
+#define BRCM_BrPRED_ALL_NOT_TAKEN	(0x1)
+#define BRCM_BrPRED_BHT_ENABLE		(0x2)
+#define BRCM_BrPRED_PREDICT_BACKWARD	(0x3)
+
+
+
+.align 2
+/*
+ * Function: 	size_i_cache
+ * Arguments: 	None
+ * Returns:	v0 = i cache size, v1 = I cache line size
+ * Description: compute the I-cache size and I-cache line size
+ * Trashes:	v0, v1, a0, t0
+ *
+ *	pseudo code:
+ *
+ */
+
+LEAF(size_i_cache)
+	.set	noreorder
+
+	mfc0	a0, CP0_CONFIG, 1
+	move	t0, a0
+
+	/*
+	 * Determine sets per way: IS
+	 *
+	 * This field contains the number of sets (i.e., indices) per way of
+	 * the instruction cache:
+	 * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
+	 * vi) 0x5 - 0x7: Reserved.
+	 */
+
+	srl	a0, a0, IS_SHIFT
+	and	a0, a0, IS_MASK
+
+	/* sets per way = (64<<IS) */
+
+	li	v0, 0x40
+	sllv	v0, v0, a0
+
+	/*
+	 * Determine line size
+	 *
+	 * This field contains the line size of the instruction cache:
+	 * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
+	 * 0x5: 64 bytes, iv) the rest: Reserved.
+	 */
+
+	move	a0, t0
+
+	srl	a0, a0, IL_SHIFT
+	and	a0, a0, IL_MASK
+
+	beqz	a0, no_i_cache
+	nop
+
+	/* line size = 2 ^ (IL+1) */
+
+	addi	a0, a0, 1
+	li	v1, 1
+	sll	v1, v1, a0
+
+	/* v0 now have sets per way, multiply it by line size now
+	 * that will give the set size
+	 */
+
+	sll	v0, v0, a0
+
+	/*
+	 * Determine set associativity
+	 *
+	 * This field contains the set associativity of the instruction cache.
+	 * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
+	 * 4-way, v) 0x4 - 0x7: Reserved.
+	 */
+
+	move	a0, t0
+
+	srl	a0, a0, IA_SHIFT
+	and	a0, a0, IA_MASK
+	addi	a0, a0, 0x1
+
+	/* v0 has the set size, multiply it by
+	 * set associativiy, to get the cache size
+	 */
+
+	multu	v0, a0	/*multu is interlocked, so no need to insert nops */
+	mflo	v0
+	b	1f
+	nop
+
+no_i_cache:
+	move	v0, zero
+	move	v1, zero
+1:
+	jr	ra
+	nop
+	.set	reorder
+
+END(size_i_cache)
+
+/*
+ * Function: 	size_d_cache
+ * Arguments: 	None
+ * Returns:	v0 = d cache size, v1 = d cache line size
+ * Description: compute the D-cache size and D-cache line size.
+ * Trashes:	v0, v1, a0, t0
+ *
+ */
+
+LEAF(size_d_cache)
+	.set	noreorder
+
+	mfc0	a0, CP0_CONFIG, 1
+	move	t0, a0
+
+	/*
+	 * Determine sets per way: IS
+	 *
+	 * This field contains the number of sets (i.e., indices) per way of
+	 * the instruction cache:
+	 * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
+	 * vi) 0x5 - 0x7: Reserved.
+	 */
+
+	srl	a0, a0, DS_SHIFT
+	and	a0, a0, DS_MASK
+
+	/* sets per way = (64<<IS) */
+
+	li	v0, 0x40
+	sllv	v0, v0, a0
+
+	/*
+	 * Determine line size
+	 *
+	 * This field contains the line size of the instruction cache:
+	 * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
+	 * 0x5: 64 bytes, iv) the rest: Reserved.
+	 */
+	move	a0, t0
+
+	srl	a0, a0, DL_SHIFT
+	and	a0, a0, DL_MASK
+
+	beqz	a0, no_d_cache
+	nop
+
+	/* line size = 2 ^ (IL+1) */
+
+	addi	a0, a0, 1
+	li	v1, 1
+	sll	v1, v1, a0
+
+	/* v0 now have sets per way, multiply it by line size now
+	 * that will give the set size
+	 */
+
+	sll	v0, v0, a0
+
+	/* determine set associativity
+	 *
+	 * This field contains the set associativity of the instruction cache.
+	 * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
+	 * 4-way, v) 0x4 - 0x7: Reserved.
+	 */
+
+	move	a0, t0
+
+	srl	a0, a0, DA_SHIFT
+	and	a0, a0, DA_MASK
+	addi	a0, a0, 0x1
+
+	/* v0 has the set size, multiply it by
+	 * set associativiy, to get the cache size
+	 */
+
+	multu	v0, a0	/*multu is interlocked, so no need to insert nops */
+	mflo	v0
+
+	b	1f
+	nop
+
+no_d_cache:
+	move	v0, zero
+	move	v1, zero
+1:
+	jr	ra
+	nop
+	.set	reorder
+
+END(size_d_cache)
+
+
+/*
+ * Function: enable_ID
+ * Arguments: 	None
+ * Returns:	None
+ * Description: Enable I and D caches, initialize I and D-caches, also set
+ *		hardware delay for d-cache (TP0).
+ * Trashes:	t0
+ *
+ */
+	.global	enable_ID
+	.ent	enable_ID
+	.set	noreorder
+enable_ID:
+	mfc0	t0, CP0_BRCM_CONFIG0
+	or	t0, t0, (ICE_MASK | DCE_MASK)
+	mtc0	t0, CP0_BRCM_CONFIG0
+	jr	ra
+	nop
+
+	.end	enable_ID
+	.set	reorder
+
+
+/*
+ * Function: l1_init
+ * Arguments: 	None
+ * Returns:	None
+ * Description: Enable I and D caches, and initialize I and D-caches
+ * Trashes:	a0, v0, v1, t0, t1, t2, t8
+ *
+ */
+	.globl	l1_init
+	.ent	l1_init
+	.set	noreorder
+l1_init:
+
+	/* save return address */
+	move	t8, ra
+
+
+	/* initialize I and D cache Data and Tag registers.  */
+	mtc0	zero, CP0_ICACHE_TAG_LO
+	mtc0	zero, CP0_ICACHE_TAG_HI
+	mtc0	zero, CP0_ICACHE_DATA_LO
+	mtc0	zero, CP0_ICACHE_DATA_HI
+	mtc0	zero, CP0_DCACHE_TAG_LO
+	mtc0	zero, CP0_DCACHE_TAG_HI
+
+	/* Enable Caches before Clearing. If the caches are disabled
+	 * then the cache operations to clear the cache will be ignored
+	 */
+
+	jal	enable_ID
+	nop
+
+	jal	size_i_cache	/* v0 = i-cache size, v1 = i-cache line size */
+	nop
+
+	/* run uncached in kseg 1 */
+	la	k0, 1f
+	lui	k1, 0x2000
+	or	k0, k1, k0
+	jr	k0
+	nop
+1:
+
+	/*
+	 * set K0 cache mode
+	 */
+
+	mfc0	t0, CP0_CONFIG
+	and	t0, t0, ~CP0_CONFIG_K0_MASK
+	or	t0, t0, 3	/* Write Back mode */
+	mtc0	t0, CP0_CONFIG
+
+	/*
+	 * Initialize instruction cache.
+	 */
+
+	li	a0, KSEG0
+	cacheop(a0, v0, v1, Index_Store_Tag_I)
+
+	/*
+	 * Now we can run from I-$, kseg 0
+	 */
+	la	k0, 1f
+	lui	k1, 0x2000
+	or	k0, k1, k0
+	xor	k0, k1, k0
+	jr	k0
+	nop
+1:
+	/*
+	 * Initialize data cache.
+	 */
+
+	jal	size_d_cache	/* v0 = d-cache size, v1 = d-cache line size */
+	nop
+
+
+	li	a0, KSEG0
+	cacheop(a0, v0, v1, Index_Store_Tag_D)
+
+	jr	t8
+	nop
+
+	.end 	l1_init
+	.set	reorder
+
+
+/*
+ * Function: 	set_other_config
+ * Arguments:	none
+ * Returns:	None
+ * Description: initialize other remainder configuration to defaults.
+ * Trashes:	t0, t1
+ *
+ *	pseudo code:
+ *
+ */
+LEAF(set_other_config)
+	.set noreorder
+
+	/* enable Bus error for I-fetch */
+	mfc0	t0, CP0_CACHEERR, 0
+	li	t1, 0x4
+	or	t0, t1
+	mtc0	t0, CP0_CACHEERR, 0
+
+	/* enable Bus error for Load */
+	mfc0	t0, CP0_CACHEERR, 1
+	li	t1, 0x4
+	or	t0, t1
+	mtc0	t0, CP0_CACHEERR, 1
+
+	/* enable Bus Error for Store */
+	mfc0	t0, CP0_CACHEERR, 2
+	li	t1, 0x4
+	or	t0, t1
+	mtc0	t0, CP0_CACHEERR, 2
+
+	jr	ra
+	nop
+	.set reorder
+END(set_other_config)
+
+/*
+ * Function: 	set_branch_pred
+ * Arguments:	none
+ * Returns:	None
+ * Description:
+ * Trashes:	t0, t1
+ *
+ *	pseudo code:
+ *
+ */
+
+LEAF(set_branch_pred)
+	.set noreorder
+	mfc0	t0, CP0_BRCM_MODE
+	li	t1, ~(CP0_BRCM_MODE_BrPRED_MASK | CP0_BRCM_MODE_BrHIST_MASK )
+	and	t0, t0, t1
+
+	/* enable Branch prediction */
+	li	t1, BRCM_BrPRED_BHT_ENABLE
+	sll	t1, CP0_BRCM_MODE_BrPRED_SHIFT
+	or	t0, t0, t1
+
+	/* set history count to 8 */
+	li	t1, 8
+	sll	t1, CP0_BRCM_MODE_BrHIST_SHIFT
+	or	t0, t0, t1
+
+	mtc0	t0, CP0_BRCM_MODE
+	jr	ra
+	nop
+	.set	reorder
+END(set_branch_pred)
+
+
+/*
+ * Function: 	set_luc
+ * Arguments:	set link uncached.
+ * Returns:	None
+ * Description:
+ * Trashes:	t0, t1
+ *
+ */
+LEAF(set_luc)
+	.set noreorder
+	mfc0	t0, CP0_BRCM_MODE
+	li	t1, ~(CP0_BRCM_MODE_Luc_MASK)
+	and	t0, t0, t1
+
+	/* set Luc */
+	ori	t0, t0, CP0_BRCM_MODE_Luc_MASK
+
+	mtc0	t0, CP0_BRCM_MODE
+	jr	ra
+	nop
+	.set	reorder
+END(set_luc)
+
+/*
+ * Function: 	set_cwf_tse
+ * Arguments:	set CWF and TSE bits
+ * Returns:	None
+ * Description:
+ * Trashes:	t0, t1
+ *
+ */
+LEAF(set_cwf_tse)
+	.set noreorder
+	mfc0	t0, CP0_BRCM_CONFIG0
+	li	t1, (CP0_BRCM_CONFIG0_CWF_MASK | CP0_BRCM_CONFIG0_TSE_MASK)
+	or	t0, t0, t1
+
+	mtc0	t0, CP0_BRCM_CONFIG0
+	jr	ra
+	nop
+	.set	reorder
+END(set_cwf_tse)
+
+/*
+ * Function: 	set_clock_ratio
+ * Arguments:	set clock ratio specified by a0
+ * Returns:	None
+ * Description:
+ * Trashes:	v0, v1, a0, a1
+ *
+ *	pseudo code:
+ *
+ */
+LEAF(set_clock_ratio)
+	.set noreorder
+
+	mfc0	t0, CP0_BRCM_MODE
+	li	t1, ~(CP0_BRCM_MODE_SET_MASK | CP0_BRCM_MODE_ClkRATIO_MASK)
+	and	t0, t0, t1
+	li	t1, CP0_BRCM_MODE_SET_MASK
+	or	t0, t0, t1
+	or	t0, t0, a0
+	mtc0	t0, CP0_BRCM_MODE
+	jr	ra
+	nop
+	.set	reorder
+END(set_clock_ratio)
+/*
+ * Function: set_zephyr
+ * Arguments:	None
+ * Returns:	None
+ * Description: Set any zephyr bits
+ * Trashes:	t0 & t1
+ *
+ */
+LEAF(set_zephyr)
+	.set	noreorder
+
+	/* enable read/write of CP0 #22 sel. 8 */
+	li	t0, 0x5a455048
+	.word	0x4088b00f	/* mtc0    t0, $22, 15 */
+
+	.word	0x4008b008	/* mfc0    t0, $22, 8 */
+	li	t1, 0x09008000	/* turn off pref, jtb */
+	or	t0, t0, t1
+	.word	0x4088b008	/* mtc0    t0, $22, 8 */
+	sync
+
+	/* disable read/write of CP0 #22 sel 8 */
+	li	t0, 0x0
+	.word	0x4088b00f	/* mtc0    t0, $22, 15 */
+
+
+	jr	ra
+	nop
+	.set reorder
+
+END(set_zephyr)
+
+
+/*
+ * Function:	set_llmb
+ * Arguments:	a0=0 disable llmb, a0=1 enables llmb
+ * Returns:	None
+ * Description:
+ * Trashes:	t0, t1, t2
+ *
+ *      pseudo code:
+ *
+ */
+LEAF(set_llmb)
+	.set noreorder
+
+	li	t2, 0x90000000 | BRCM_ZSC_ALL_REGS_SELECT | BRCM_ZSC_CONFIG_REG
+	sync
+	cache	0x7, 0x0(t2)
+	sync
+	mfc0	t0, CP0_D_SEC_CACHE_DATA_LO
+	li	t1, ~(BRCM_ZSC_CONFIG_LMB1En | BRCM_ZSC_CONFIG_LMB0En)
+	and	t0, t0, t1
+
+	beqz	a0, svlmb
+	nop
+
+enable_lmb:
+	li	t1, (BRCM_ZSC_CONFIG_LMB1En | BRCM_ZSC_CONFIG_LMB0En)
+	or	t0, t0, t1
+
+svlmb:
+	mtc0	t0, CP0_D_SEC_CACHE_DATA_LO
+	sync
+	cache	0xb, 0x0(t2)
+	sync
+
+	jr	ra
+	nop
+	.set reorder
+
+END(set_llmb)
+/*
+ * Function: 	core_init
+ * Arguments:	none
+ * Returns:	None
+ * Description: initialize core related configuration
+ * Trashes:	v0,v1,a0,a1,t8
+ *
+ *	pseudo code:
+ *
+ */
+	.globl	core_init
+	.ent	core_init
+	.set	noreorder
+core_init:
+	move	t8, ra
+
+	/* set Zephyr bits. */
+	bal	set_zephyr
+	nop
+
+#if ENABLE_FPU==1
+	/* initialize the Floating point unit (both TPs) */
+	bal	init_fpu
+	nop
+#endif
+
+	/* set low latency memory bus */
+	li	a0, 1
+	bal	set_llmb
+	nop
+
+	/* set branch prediction (TP0 only) */
+	bal	set_branch_pred
+	nop
+
+	/* set link uncached */
+	bal	set_luc
+	nop
+
+	/* set CWF and TSE */
+	bal	set_cwf_tse
+	nop
+
+	/*
+	 *set clock ratio by setting 1 to 'set'
+	 * and 0 to ClkRatio, (TP0 only)
+	 */
+	li	a0, 0
+	bal	set_clock_ratio
+	nop
+
+	/* set other configuration to defaults */
+	bal	set_other_config
+	nop
+
+	move	ra, t8
+	jr	ra
+	nop
+
+	.set reorder
+	.end	core_init
+
+/*
+ * Function: 	clear_jump_target_buffer
+ * Arguments:	None
+ * Returns:	None
+ * Description:
+ * Trashes:	t0, t1, t2
+ *
+ */
+#define RESET_CALL_RETURN_STACK_THIS_THREAD		(0x06<<16)
+#define RESET_JUMP_TARGET_BUFFER_THIS_THREAD		(0x04<<16)
+#define JTB_CS_CNTL_MASK				(0xFF<<16)
+
+	.globl	clear_jump_target_buffer
+	.ent	clear_jump_target_buffer
+	.set	noreorder
+clear_jump_target_buffer:
+
+	mfc0	t0, $22, 2
+	nop
+	nop
+
+	li	t1, ~JTB_CS_CNTL_MASK
+	and	t0, t0, t1
+	li	t2, RESET_CALL_RETURN_STACK_THIS_THREAD
+	or	t0, t0, t2
+	mtc0	t0, $22, 2
+	nop
+	nop
+
+	and	t0, t0, t1
+	li	t2, RESET_JUMP_TARGET_BUFFER_THIS_THREAD
+	or	t0, t0, t2
+	mtc0	t0, $22, 2
+	nop
+	nop
+	jr	ra
+	nop
+
+	.end	clear_jump_target_buffer
+	.set	reorder
+/*
+ * Function: 	bmips_cache_init
+ * Arguments: 	None
+ * Returns:	None
+ * Description: Enable I and D caches, and initialize I and D-caches
+ * Trashes:	v0, v1, t0, t1, t2, t5, t7, t8
+ *
+ */
+	.globl	bmips_5xxx_init
+	.ent	bmips_5xxx_init
+	.set	noreorder
+bmips_5xxx_init:
+
+	/* save return address and A0 */
+	move	t7, ra
+	move	t5, a0
+
+	jal	l1_init
+	nop
+
+	jal	core_init
+	nop
+
+	jal	clear_jump_target_buffer
+	nop
+
+	mtc0	zero, CP0_CAUSE
+
+	move 	a0, t5
+	jr	t7
+	nop
+
+	.end 	bmips_5xxx_init
+	.set	reorder
+
+
+#endif
diff --git a/arch/mips/kernel/bmips_vec.S b/arch/mips/kernel/bmips_vec.S
index 8649507..921a5fa 100644
--- a/arch/mips/kernel/bmips_vec.S
+++ b/arch/mips/kernel/bmips_vec.S
@@ -88,12 +88,13 @@
 	li	k1, (1 << 19)
 	mfc0	k0, CP0_STATUS
 	and	k0, k1
-	beqz	k0, bmips_smp_entry
+	beqz	k0, soft_reset
 
 #if defined(CONFIG_CPU_BMIPS5000)
 	mfc0	k0, CP0_PRID
 	li	k1, PRID_IMP_BMIPS5000
-	andi	k0, 0xff00
+	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
+	andi	k0, PRID_IMP_BMIPS5000
 	bne	k0, k1, 1f
 
 	/* if we're not on core 0, this must be the SMP boot signal */
@@ -125,13 +126,48 @@
 	.set	arch=r4000
 	eret
 
+#ifdef CONFIG_SMP
+soft_reset:
+
+#if defined(CONFIG_CPU_BMIPS5000)
+	mfc0	k0, CP0_PRID
+	andi	k0, 0xff00
+	li	k1, PRID_IMP_BMIPS5200
+	bne	k0, k1, bmips_smp_entry
+
+        /* if running on TP 1, jump  to  bmips_smp_entry */
+        mfc0    k0, $22
+        li      k1, (1 << 24)
+        and     k1, k0
+        bnez    k1, bmips_smp_entry
+        nop
+
+        /*
+         * running on TP0, can not be core 0 (the boot core).
+         * Check for soft reset.  Indicates a warm boot
+         */
+        mfc0    k0, $12
+        li      k1, (1 << 20)
+        and     k0, k1
+        beqz    k0, bmips_smp_entry
+
+        /*
+         * Warm boot.
+         * Cache init is only done on TP0
+         */
+        la      k0, bmips_5xxx_init
+        jalr    k0
+        nop
+
+        b       bmips_smp_entry
+        nop
+#endif
+
 /***********************************************************************
  * CPU1 reset vector (used for the initial boot only)
  * This is still part of bmips_reset_nmi_vec().
  ***********************************************************************/
 
-#ifdef CONFIG_SMP
-
 bmips_smp_entry:
 
 	/* set up CP0 STATUS; enable FPU */
@@ -166,10 +202,12 @@
 2:
 #endif /* CONFIG_CPU_BMIPS4350 || CONFIG_CPU_BMIPS4380 */
 #if defined(CONFIG_CPU_BMIPS5000)
-	/* set exception vector base */
+	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
 	li	k1, PRID_IMP_BMIPS5000
+	andi	k0, PRID_IMP_BMIPS5000
 	bne	k0, k1, 3f
 
+	/* set exception vector base */
 	la	k0, ebase
 	lw	k0, 0(k0)
 	mtc0	k0, $15, 1
@@ -263,6 +301,8 @@
 #endif /* CONFIG_CPU_BMIPS4380 */
 #if defined(CONFIG_CPU_BMIPS5000)
 	li	t1, PRID_IMP_BMIPS5000
+	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
+	andi	t2, PRID_IMP_BMIPS5000
 	bne	t2, t1, 2f
 
 	mfc0	t0, $22, 5
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index d8f9b35..ceca6cc 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -688,21 +688,9 @@
 			}
 			lose_fpu(1);    /* Save FPU state for the emulator. */
 			reg = insn.i_format.rt;
-			bit = 0;
-			switch (insn.i_format.rs) {
-			case bc1eqz_op:
-				/* Test bit 0 */
-				if (get_fpr32(&current->thread.fpu.fpr[reg], 0)
-				    & 0x1)
-					bit = 1;
-				break;
-			case bc1nez_op:
-				/* Test bit 0 */
-				if (!(get_fpr32(&current->thread.fpu.fpr[reg], 0)
-				      & 0x1))
-					bit = 1;
-				break;
-			}
+			bit = get_fpr32(&current->thread.fpu.fpr[reg], 0) & 0x1;
+			if (insn.i_format.rs == bc1eqz_op)
+				bit = !bit;
 			own_fpu(1);
 			if (bit)
 				epc = epc + 4 +
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 8dfe6a6..e4c21bb 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -28,6 +28,83 @@
 	return res;
 }
 
+/**
+ * calculate_min_delta() - Calculate a good minimum delta for mips_next_event().
+ *
+ * Running under virtualisation can introduce overhead into mips_next_event() in
+ * the form of hypervisor emulation of CP0_Count/CP0_Compare registers,
+ * potentially with an unnatural frequency, which makes a fixed min_delta_ns
+ * value inappropriate as it may be too small.
+ *
+ * It can also introduce occasional latency from the guest being descheduled.
+ *
+ * This function calculates a good minimum delta based roughly on the 75th
+ * percentile of the time taken to do the mips_next_event() sequence, in order
+ * to handle potentially higher overhead while also eliminating outliers due to
+ * unpredictable hypervisor latency (which can be handled by retries).
+ *
+ * Return:	An appropriate minimum delta for the clock event device.
+ */
+static unsigned int calculate_min_delta(void)
+{
+	unsigned int cnt, i, j, k, l;
+	unsigned int buf1[4], buf2[3];
+	unsigned int min_delta;
+
+	/*
+	 * Calculate the median of 5 75th percentiles of 5 samples of how long
+	 * it takes to set CP0_Compare = CP0_Count + delta.
+	 */
+	for (i = 0; i < 5; ++i) {
+		for (j = 0; j < 5; ++j) {
+			/*
+			 * This is like the code in mips_next_event(), and
+			 * directly measures the borderline "safe" delta.
+			 */
+			cnt = read_c0_count();
+			write_c0_compare(cnt);
+			cnt = read_c0_count() - cnt;
+
+			/* Sorted insert into buf1 */
+			for (k = 0; k < j; ++k) {
+				if (cnt < buf1[k]) {
+					l = min_t(unsigned int,
+						  j, ARRAY_SIZE(buf1) - 1);
+					for (; l > k; --l)
+						buf1[l] = buf1[l - 1];
+					break;
+				}
+			}
+			if (k < ARRAY_SIZE(buf1))
+				buf1[k] = cnt;
+		}
+
+		/* Sorted insert of 75th percentile into buf2 */
+		for (k = 0; k < i; ++k) {
+			if (buf1[ARRAY_SIZE(buf1) - 1] < buf2[k]) {
+				l = min_t(unsigned int,
+					  i, ARRAY_SIZE(buf2) - 1);
+				for (; l > k; --l)
+					buf2[l] = buf2[l - 1];
+				break;
+			}
+		}
+		if (k < ARRAY_SIZE(buf2))
+			buf2[k] = buf1[ARRAY_SIZE(buf1) - 1];
+	}
+
+	/* Use 2 * median of 75th percentiles */
+	min_delta = buf2[ARRAY_SIZE(buf2) - 1] * 2;
+
+	/* Don't go too low */
+	if (min_delta < 0x300)
+		min_delta = 0x300;
+
+	pr_debug("%s: median 75th percentile=%#x, min_delta=%#x\n",
+		 __func__, buf2[ARRAY_SIZE(buf2) - 1], min_delta);
+	return min_delta;
+}
+
 DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 int cp0_timer_irq_installed;
 
@@ -177,7 +254,7 @@
 {
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *cd;
-	unsigned int irq;
+	unsigned int irq, min_delta;
 
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
@@ -203,7 +280,8 @@
 
 	/* Calculate the min / max delta */
 	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+	min_delta		= calculate_min_delta();
+	cd->min_delta_ns	= clockevent_delta2ns(min_delta, cd);
 
 	cd->rating		= 300;
 	cd->irq			= irq;
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index ac81edd..51b98dc 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -18,9 +18,12 @@
 #include <asm/mipsmtregs.h>
 #include <asm/pm.h>
 
+#define GCR_CPC_BASE_OFS	0x0088
 #define GCR_CL_COHERENCE_OFS	0x2008
 #define GCR_CL_ID_OFS		0x2028
 
+#define CPC_CL_VC_RUN_OFS	0x2028
+
 .extern mips_cm_base
 
 .set noreorder
@@ -60,6 +63,37 @@
 	 nop
 	.endm
 
+	/*
+	 * Set dest to non-zero if the core supports MIPSr6 multithreading
+	 * (ie. VPs), else zero. If MIPSr6 multithreading is not supported then
+	 * branch to nomt.
+	 */
+	.macro	has_vp	dest, nomt
+	mfc0	\dest, CP0_CONFIG, 1
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 2
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 3
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 4
+	bgez	\dest, \nomt
+	 mfc0	\dest, CP0_CONFIG, 5
+	andi	\dest, \dest, MIPS_CONF5_VP
+	beqz	\dest, \nomt
+	 nop
+	.endm
+
+	/* Calculate an uncached address for the CM GCRs */
+	.macro	cmgcrb	dest
+	.set	push
+	.set	noat
+	MFC0	$1, CP0_CMGCRBASE
+	PTR_SLL	$1, $1, 4
+	PTR_LI	\dest, UNCAC_BASE
+	PTR_ADDU \dest, \dest, $1
+	.set	pop
+	.endm
+
 .section .text.cps-vec
 .balign 0x1000
 
@@ -90,120 +124,64 @@
 	li	t0, ST0_CU1 | ST0_CU0 | ST0_BEV | STATUS_BITDEPS
 	mtc0	t0, CP0_STATUS
 
-	/*
-	 * Clear the bits used to index the caches. Note that the architecture
-	 * dictates that writing to any of TagLo or TagHi selects 0 or 2 should
-	 * be valid for all MIPS32 CPUs, even those for which said writes are
-	 * unnecessary.
-	 */
-	mtc0	zero, CP0_TAGLO, 0
-	mtc0	zero, CP0_TAGHI, 0
-	mtc0	zero, CP0_TAGLO, 2
-	mtc0	zero, CP0_TAGHI, 2
-	ehb
-
-	/* Primary cache configuration is indicated by Config1 */
-	mfc0	v0, CP0_CONFIG, 1
-
-	/* Detect I-cache line size */
-	_EXT	t0, v0, MIPS_CONF1_IL_SHF, MIPS_CONF1_IL_SZ
-	beqz	t0, icache_done
-	 li	t1, 2
-	sllv	t0, t1, t0
-
-	/* Detect I-cache size */
-	_EXT	t1, v0, MIPS_CONF1_IS_SHF, MIPS_CONF1_IS_SZ
-	xori	t2, t1, 0x7
-	beqz	t2, 1f
-	 li	t3, 32
-	addiu	t1, t1, 1
-	sllv	t1, t3, t1
-1:	/* At this point t1 == I-cache sets per way */
-	_EXT	t2, v0, MIPS_CONF1_IA_SHF, MIPS_CONF1_IA_SZ
-	addiu	t2, t2, 1
-	mul	t1, t1, t0
-	mul	t1, t1, t2
-
-	li	a0, CKSEG0
-	PTR_ADD	a1, a0, t1
-1:	cache	Index_Store_Tag_I, 0(a0)
-	PTR_ADD	a0, a0, t0
-	bne	a0, a1, 1b
+	/* Skip cache & coherence setup if we're already coherent */
+	cmgcrb	v1
+	lw	s7, GCR_CL_COHERENCE_OFS(v1)
+	bnez	s7, 1f
 	 nop
-icache_done:
 
-	/* Detect D-cache line size */
-	_EXT	t0, v0, MIPS_CONF1_DL_SHF, MIPS_CONF1_DL_SZ
-	beqz	t0, dcache_done
-	 li	t1, 2
-	sllv	t0, t1, t0
-
-	/* Detect D-cache size */
-	_EXT	t1, v0, MIPS_CONF1_DS_SHF, MIPS_CONF1_DS_SZ
-	xori	t2, t1, 0x7
-	beqz	t2, 1f
-	 li	t3, 32
-	addiu	t1, t1, 1
-	sllv	t1, t3, t1
-1:	/* At this point t1 == D-cache sets per way */
-	_EXT	t2, v0, MIPS_CONF1_DA_SHF, MIPS_CONF1_DA_SZ
-	addiu	t2, t2, 1
-	mul	t1, t1, t0
-	mul	t1, t1, t2
-
-	li	a0, CKSEG0
-	PTR_ADDU a1, a0, t1
-	PTR_SUBU a1, a1, t0
-1:	cache	Index_Store_Tag_D, 0(a0)
-	bne	a0, a1, 1b
-	 PTR_ADD a0, a0, t0
-dcache_done:
-
-	/* Set Kseg0 CCA to that in s0 */
-	mfc0	t0, CP0_CONFIG
-	ori	t0, 0x7
-	xori	t0, 0x7
-	or	t0, t0, s0
-	mtc0	t0, CP0_CONFIG
-	ehb
-
-	/* Calculate an uncached address for the CM GCRs */
-	MFC0	v1, CP0_CMGCRBASE
-	PTR_SLL	v1, v1, 4
-	PTR_LI	t0, UNCAC_BASE
-	PTR_ADDU v1, v1, t0
+	/* Initialize the L1 caches */
+	jal	mips_cps_cache_init
+	 nop
 
 	/* Enter the coherent domain */
 	li	t0, 0xff
 	sw	t0, GCR_CL_COHERENCE_OFS(v1)
 	ehb
 
+	/* Set Kseg0 CCA to that in s0 */
+1:	mfc0	t0, CP0_CONFIG
+	ori	t0, 0x7
+	xori	t0, 0x7
+	or	t0, t0, s0
+	mtc0	t0, CP0_CONFIG
+	ehb
+
 	/* Jump to kseg0 */
 	PTR_LA	t0, 1f
 	jr	t0
 	 nop
 
 	/*
-	 * We're up, cached & coherent. Perform any further required core-level
-	 * initialisation.
+	 * We're up, cached & coherent. Perform any EVA initialization necessary
+	 * before we access memory.
 	 */
-1:	jal	mips_cps_core_init
+1:	eva_init
+
+	/* Retrieve boot configuration pointers */
+	jal	mips_cps_get_bootcfg
 	 nop
 
-	/* Do any EVA initialization if necessary */
-	eva_init
+	/* Skip core-level init if we started up coherent */
+	bnez	s7, 1f
+	 nop
+
+	/* Perform any further required core-level initialisation */
+	jal	mips_cps_core_init
+	 nop
 
 	/*
 	 * Boot any other VPEs within this core that should be online, and
 	 * deactivate this VPE if it should be offline.
 	 */
+	move	a1, t9
 	jal	mips_cps_boot_vpes
-	 nop
+	 move	a0, v0
 
 	/* Off we go! */
-	PTR_L	t1, VPEBOOTCFG_PC(v0)
-	PTR_L	gp, VPEBOOTCFG_GP(v0)
-	PTR_L	sp, VPEBOOTCFG_SP(v0)
+1:	PTR_L	t1, VPEBOOTCFG_PC(v1)
+	PTR_L	gp, VPEBOOTCFG_GP(v1)
+	PTR_L	sp, VPEBOOTCFG_SP(v1)
 	jr	t1
 	 nop
 	END(mips_cps_core_entry)
@@ -245,7 +223,6 @@
 
 .org 0x480
 LEAF(excep_ejtag)
-	DUMP_EXCEP("EJTAG")
 	PTR_LA	k0, ejtag_debug_handler
 	jr	k0
 	 nop
@@ -323,22 +300,35 @@
 	 nop
 	END(mips_cps_core_init)
 
-LEAF(mips_cps_boot_vpes)
-	/* Retrieve CM base address */
-	PTR_LA	t0, mips_cm_base
-	PTR_L	t0, 0(t0)
-
+/**
+ * mips_cps_get_bootcfg() - retrieve boot configuration pointers
+ *
+ * Returns: pointer to struct core_boot_config in v0, pointer to
+ *          struct vpe_boot_config in v1, VPE ID in t9
+ */
+LEAF(mips_cps_get_bootcfg)
 	/* Calculate a pointer to this cores struct core_boot_config */
+	cmgcrb	t0
 	lw	t0, GCR_CL_ID_OFS(t0)
 	li	t1, COREBOOTCFG_SIZE
 	mul	t0, t0, t1
 	PTR_LA	t1, mips_cps_core_bootcfg
 	PTR_L	t1, 0(t1)
-	PTR_ADDU t0, t0, t1
+	PTR_ADDU v0, t0, t1
 
 	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
 	li	t9, 0
-#ifdef CONFIG_MIPS_MT_SMP
+#if defined(CONFIG_CPU_MIPSR6)
+	has_vp	ta2, 1f
+
+	/*
+	 * Assume non-contiguous numbering. Perhaps some day we'll need
+	 * to handle contiguous VP numbering, but no such systems yet
+	 * exist.
+	 */
+	mfc0	t9, $3, 1
+	andi	t9, t9, 0xff
+#elif defined(CONFIG_MIPS_MT_SMP)
 	has_mt	ta2, 1f
 
 	/* Find the number of VPEs present in the core */
@@ -362,22 +352,43 @@
 
 1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
 	li	t1, VPEBOOTCFG_SIZE
-	mul	v0, t9, t1
-	PTR_L	ta3, COREBOOTCFG_VPECONFIG(t0)
-	PTR_ADDU v0, v0, ta3
+	mul	v1, t9, t1
+	PTR_L	ta3, COREBOOTCFG_VPECONFIG(v0)
+	PTR_ADDU v1, v1, ta3
 
-#ifdef CONFIG_MIPS_MT_SMP
-
-	/* If the core doesn't support MT then return */
-	bnez	ta2, 1f
-	 nop
 	jr	ra
 	 nop
+	END(mips_cps_get_bootcfg)
+
+LEAF(mips_cps_boot_vpes)
+	PTR_L	ta2, COREBOOTCFG_VPEMASK(a0)
+	PTR_L	ta3, COREBOOTCFG_VPECONFIG(a0)
+
+#if defined(CONFIG_CPU_MIPSR6)
+
+	has_vp	t0, 5f
+
+	/* Find base address of CPC */
+	cmgcrb	t3
+	PTR_L	t1, GCR_CPC_BASE_OFS(t3)
+	PTR_LI	t2, ~0x7fff
+	and	t1, t1, t2
+	PTR_LI	t2, UNCAC_BASE
+	PTR_ADD	t1, t1, t2
+
+	/* Set VC_RUN to the VPE mask */
+	PTR_S	ta2, CPC_CL_VC_RUN_OFS(t1)
+	ehb
+
+#elif defined(CONFIG_MIPS_MT)
 
 	.set	push
 	.set	mt
 
-1:	/* Enter VPE configuration state */
+	/* If the core doesn't support MT then return */
+	has_mt	t0, 5f
+
+	/* Enter VPE configuration state */
 	dvpe
 	PTR_LA	t1, 1f
 	jr.hb	t1
@@ -388,7 +399,6 @@
 	ehb
 
 	/* Loop through each VPE */
-	PTR_L	ta2, COREBOOTCFG_VPEMASK(t0)
 	move	t8, ta2
 	li	ta1, 0
 
@@ -465,7 +475,7 @@
 
 	/* Check whether this VPE is meant to be running */
 	li	t0, 1
-	sll	t0, t0, t9
+	sll	t0, t0, a1
 	and	t0, t0, t8
 	bnez	t0, 2f
 	 nop
@@ -482,10 +492,84 @@
 #endif /* CONFIG_MIPS_MT_SMP */
 
 	/* Return */
-	jr	ra
+5:	jr	ra
 	 nop
 	END(mips_cps_boot_vpes)
 
+LEAF(mips_cps_cache_init)
+	/*
+	 * Clear the bits used to index the caches. Note that the architecture
+	 * dictates that writing to any of TagLo or TagHi selects 0 or 2 should
+	 * be valid for all MIPS32 CPUs, even those for which said writes are
+	 * unnecessary.
+	 */
+	mtc0	zero, CP0_TAGLO, 0
+	mtc0	zero, CP0_TAGHI, 0
+	mtc0	zero, CP0_TAGLO, 2
+	mtc0	zero, CP0_TAGHI, 2
+	ehb
+
+	/* Primary cache configuration is indicated by Config1 */
+	mfc0	v0, CP0_CONFIG, 1
+
+	/* Detect I-cache line size */
+	_EXT	t0, v0, MIPS_CONF1_IL_SHF, MIPS_CONF1_IL_SZ
+	beqz	t0, icache_done
+	 li	t1, 2
+	sllv	t0, t1, t0
+
+	/* Detect I-cache size */
+	_EXT	t1, v0, MIPS_CONF1_IS_SHF, MIPS_CONF1_IS_SZ
+	xori	t2, t1, 0x7
+	beqz	t2, 1f
+	 li	t3, 32
+	addiu	t1, t1, 1
+	sllv	t1, t3, t1
+1:	/* At this point t1 == I-cache sets per way */
+	_EXT	t2, v0, MIPS_CONF1_IA_SHF, MIPS_CONF1_IA_SZ
+	addiu	t2, t2, 1
+	mul	t1, t1, t0
+	mul	t1, t1, t2
+
+	li	a0, CKSEG0
+	PTR_ADD	a1, a0, t1
+1:	cache	Index_Store_Tag_I, 0(a0)
+	PTR_ADD	a0, a0, t0
+	bne	a0, a1, 1b
+	 nop
+icache_done:
+
+	/* Detect D-cache line size */
+	_EXT	t0, v0, MIPS_CONF1_DL_SHF, MIPS_CONF1_DL_SZ
+	beqz	t0, dcache_done
+	 li	t1, 2
+	sllv	t0, t1, t0
+
+	/* Detect D-cache size */
+	_EXT	t1, v0, MIPS_CONF1_DS_SHF, MIPS_CONF1_DS_SZ
+	xori	t2, t1, 0x7
+	beqz	t2, 1f
+	 li	t3, 32
+	addiu	t1, t1, 1
+	sllv	t1, t3, t1
+1:	/* At this point t1 == D-cache sets per way */
+	_EXT	t2, v0, MIPS_CONF1_DA_SHF, MIPS_CONF1_DA_SZ
+	addiu	t2, t2, 1
+	mul	t1, t1, t0
+	mul	t1, t1, t2
+
+	li	a0, CKSEG0
+	PTR_ADDU a1, a0, t1
+	PTR_SUBU a1, a1, t0
+1:	cache	Index_Store_Tag_D, 0(a0)
+	bne	a0, a1, 1b
+	 PTR_ADD a0, a0, t0
+dcache_done:
+
+	jr	ra
+	 nop
+	END(mips_cps_cache_init)
+
 #if defined(CONFIG_MIPS_CPS_PM) && defined(CONFIG_CPU_PM)
 
 	/* Calculate a pointer to this CPUs struct mips_static_suspend_state */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index b725b71..5ac5c3e 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -539,6 +539,7 @@
 	switch (c->cputype) {
 	case CPU_PROAPTIV:
 	case CPU_P5600:
+	case CPU_P6600:
 		/* proAptiv & related cores use Config6 to enable the FTLB */
 		config = read_c0_config6();
 		/* Clear the old probability value */
@@ -561,6 +562,19 @@
 		write_c0_config7(config | (calculate_ftlb_probability(c)
 					   << MIPS_CONF7_FTLBP_SHIFT));
 		break;
+	case CPU_LOONGSON3:
+		/* Flush ITLB, DTLB, VTLB and FTLB */
+		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
+			      LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
+		/* Loongson-3 cores use Config6 to enable the FTLB */
+		config = read_c0_config6();
+		if (enable)
+			/* Enable FTLB */
+			write_c0_config6(config & ~MIPS_CONF6_FTLBDIS);
+		else
+			/* Disable FTLB */
+			write_c0_config6(config | MIPS_CONF6_FTLBDIS);
+		break;
 	default:
 		return 1;
 	}
@@ -634,6 +648,8 @@
 
 	if (config1 & MIPS_CONF1_MD)
 		c->ases |= MIPS_ASE_MDMX;
+	if (config1 & MIPS_CONF1_PC)
+		c->options |= MIPS_CPU_PERF;
 	if (config1 & MIPS_CONF1_WR)
 		c->options |= MIPS_CPU_WATCH;
 	if (config1 & MIPS_CONF1_CA)
@@ -673,18 +689,25 @@
 
 	if (config3 & MIPS_CONF3_SM) {
 		c->ases |= MIPS_ASE_SMARTMIPS;
-		c->options |= MIPS_CPU_RIXI;
+		c->options |= MIPS_CPU_RIXI | MIPS_CPU_CTXTC;
 	}
 	if (config3 & MIPS_CONF3_RXI)
 		c->options |= MIPS_CPU_RIXI;
+	if (config3 & MIPS_CONF3_CTXTC)
+		c->options |= MIPS_CPU_CTXTC;
 	if (config3 & MIPS_CONF3_DSP)
 		c->ases |= MIPS_ASE_DSP;
-	if (config3 & MIPS_CONF3_DSP2P)
+	if (config3 & MIPS_CONF3_DSP2P) {
 		c->ases |= MIPS_ASE_DSP2P;
+		if (cpu_has_mips_r6)
+			c->ases |= MIPS_ASE_DSP3;
+	}
 	if (config3 & MIPS_CONF3_VINT)
 		c->options |= MIPS_CPU_VINT;
 	if (config3 & MIPS_CONF3_VEIC)
 		c->options |= MIPS_CPU_VEIC;
+	if (config3 & MIPS_CONF3_LPA)
+		c->options |= MIPS_CPU_LPA;
 	if (config3 & MIPS_CONF3_MT)
 		c->ases |= MIPS_ASE_MIPSMT;
 	if (config3 & MIPS_CONF3_ULRI)
@@ -695,6 +718,10 @@
 		c->ases |= MIPS_ASE_VZ;
 	if (config3 & MIPS_CONF3_SC)
 		c->options |= MIPS_CPU_SEGMENTS;
+	if (config3 & MIPS_CONF3_BI)
+		c->options |= MIPS_CPU_BADINSTR;
+	if (config3 & MIPS_CONF3_BP)
+		c->options |= MIPS_CPU_BADINSTRP;
 	if (config3 & MIPS_CONF3_MSA)
 		c->ases |= MIPS_ASE_MSA;
 	if (config3 & MIPS_CONF3_PW) {
@@ -715,6 +742,7 @@
 	unsigned int newcf4;
 	unsigned int mmuextdef;
 	unsigned int ftlb_page = MIPS_CONF4_FTLBPAGESIZE;
+	unsigned long asid_mask;
 
 	config4 = read_c0_config4();
 
@@ -773,7 +801,20 @@
 		}
 	}
 
-	c->kscratch_mask = (config4 >> 16) & 0xff;
+	c->kscratch_mask = (config4 & MIPS_CONF4_KSCREXIST)
+				>> MIPS_CONF4_KSCREXIST_SHIFT;
+
+	asid_mask = MIPS_ENTRYHI_ASID;
+	if (config4 & MIPS_CONF4_AE)
+		asid_mask |= MIPS_ENTRYHI_ASIDX;
+	set_cpu_asid_mask(c, asid_mask);
+
+	/*
+	 * Warn if the computed ASID mask doesn't match the mask the kernel
+	 * is built for. This may indicate either a serious problem or an
+	 * easy optimisation opportunity, but either way should be addressed.
+	 */
+	WARN_ON(asid_mask != cpu_asid_mask(c));
 
 	return config4 & MIPS_CONF_M;
 }
@@ -796,6 +837,8 @@
 	if (config5 & MIPS_CONF5_MVH)
 		c->options |= MIPS_CPU_XPA;
 #endif
+	if (cpu_has_mips_r6 && (config5 & MIPS_CONF5_VP))
+		c->options |= MIPS_CPU_VP;
 
 	return config5 & MIPS_CONF_M;
 }
@@ -826,17 +869,43 @@
 	if (ok)
 		ok = decode_config5(c);
 
-	mips_probe_watch_registers(c);
+	/* Probe the EBase.WG bit */
+	if (cpu_has_mips_r2_r6) {
+		u64 ebase;
+		unsigned int status;
 
-	if (cpu_has_rixi) {
-		/* Enable the RIXI exceptions */
-		set_c0_pagegrain(PG_IEC);
-		back_to_back_c0_hazard();
-		/* Verify the IEC bit is set */
-		if (read_c0_pagegrain() & PG_IEC)
-			c->options |= MIPS_CPU_RIXIEX;
+		/* {read,write}_c0_ebase_64() may be UNDEFINED prior to r6 */
+		ebase = cpu_has_mips64r6 ? read_c0_ebase_64()
+					 : (s32)read_c0_ebase();
+		if (ebase & MIPS_EBASE_WG) {
+			/* WG bit already set, we can avoid the clumsy probe */
+			c->options |= MIPS_CPU_EBASE_WG;
+		} else {
+			/* Its UNDEFINED to change EBase while BEV=0 */
+			status = read_c0_status();
+			write_c0_status(status | ST0_BEV);
+			irq_enable_hazard();
+			/*
+			 * On pre-r6 cores, this may well clobber the upper bits
+			 * of EBase. This is hard to avoid without potentially
+			 * hitting UNDEFINED dm*c0 behaviour if EBase is 32-bit.
+			 */
+			if (cpu_has_mips64r6)
+				write_c0_ebase_64(ebase | MIPS_EBASE_WG);
+			else
+				write_c0_ebase(ebase | MIPS_EBASE_WG);
+			back_to_back_c0_hazard();
+			/* Restore BEV */
+			write_c0_status(status);
+			if (read_c0_ebase() & MIPS_EBASE_WG) {
+				c->options |= MIPS_CPU_EBASE_WG;
+				write_c0_ebase(ebase);
+			}
+		}
 	}
 
+	mips_probe_watch_registers(c);
+
 #ifndef CONFIG_MIPS_CPS
 	if (cpu_has_mips_r2_r6) {
 		c->core = get_ebase_cpunum();
@@ -846,6 +915,235 @@
 #endif
 }
 
+/*
+ * Probe for certain guest capabilities by writing config bits and reading back.
+ * Finally write back the original value.
+ */
+#define probe_gc0_config(name, maxconf, bits)				\
+do {									\
+	unsigned int tmp;						\
+	tmp = read_gc0_##name();					\
+	write_gc0_##name(tmp | (bits));					\
+	back_to_back_c0_hazard();					\
+	maxconf = read_gc0_##name();					\
+	write_gc0_##name(tmp);						\
+} while (0)
+
+/*
+ * Probe for dynamic guest capabilities by changing certain config bits and
+ * reading back to see if they change. Finally write back the original value.
+ */
+#define probe_gc0_config_dyn(name, maxconf, dynconf, bits)		\
+do {									\
+	maxconf = read_gc0_##name();					\
+	write_gc0_##name(maxconf ^ (bits));				\
+	back_to_back_c0_hazard();					\
+	dynconf = maxconf ^ read_gc0_##name();				\
+	write_gc0_##name(maxconf);					\
+	maxconf |= dynconf;						\
+} while (0)
+
+static inline unsigned int decode_guest_config0(struct cpuinfo_mips *c)
+{
+	unsigned int config0;
+
+	probe_gc0_config(config, config0, MIPS_CONF_M);
+
+	if (config0 & MIPS_CONF_M)
+		c->guest.conf |= BIT(1);
+	return config0 & MIPS_CONF_M;
+}
+
+static inline unsigned int decode_guest_config1(struct cpuinfo_mips *c)
+{
+	unsigned int config1, config1_dyn;
+
+	probe_gc0_config_dyn(config1, config1, config1_dyn,
+			     MIPS_CONF_M | MIPS_CONF1_PC | MIPS_CONF1_WR |
+			     MIPS_CONF1_FP);
+
+	if (config1 & MIPS_CONF1_FP)
+		c->guest.options |= MIPS_CPU_FPU;
+	if (config1_dyn & MIPS_CONF1_FP)
+		c->guest.options_dyn |= MIPS_CPU_FPU;
+
+	if (config1 & MIPS_CONF1_WR)
+		c->guest.options |= MIPS_CPU_WATCH;
+	if (config1_dyn & MIPS_CONF1_WR)
+		c->guest.options_dyn |= MIPS_CPU_WATCH;
+
+	if (config1 & MIPS_CONF1_PC)
+		c->guest.options |= MIPS_CPU_PERF;
+	if (config1_dyn & MIPS_CONF1_PC)
+		c->guest.options_dyn |= MIPS_CPU_PERF;
+
+	if (config1 & MIPS_CONF_M)
+		c->guest.conf |= BIT(2);
+	return config1 & MIPS_CONF_M;
+}
+
+static inline unsigned int decode_guest_config2(struct cpuinfo_mips *c)
+{
+	unsigned int config2;
+
+	probe_gc0_config(config2, config2, MIPS_CONF_M);
+
+	if (config2 & MIPS_CONF_M)
+		c->guest.conf |= BIT(3);
+	return config2 & MIPS_CONF_M;
+}
+
+static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
+{
+	unsigned int config3, config3_dyn;
+
+	probe_gc0_config_dyn(config3, config3, config3_dyn,
+			     MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC);
+
+	if (config3 & MIPS_CONF3_CTXTC)
+		c->guest.options |= MIPS_CPU_CTXTC;
+	if (config3_dyn & MIPS_CONF3_CTXTC)
+		c->guest.options_dyn |= MIPS_CPU_CTXTC;
+
+	if (config3 & MIPS_CONF3_PW)
+		c->guest.options |= MIPS_CPU_HTW;
+
+	if (config3 & MIPS_CONF3_SC)
+		c->guest.options |= MIPS_CPU_SEGMENTS;
+
+	if (config3 & MIPS_CONF3_BI)
+		c->guest.options |= MIPS_CPU_BADINSTR;
+	if (config3 & MIPS_CONF3_BP)
+		c->guest.options |= MIPS_CPU_BADINSTRP;
+
+	if (config3 & MIPS_CONF3_MSA)
+		c->guest.ases |= MIPS_ASE_MSA;
+	if (config3_dyn & MIPS_CONF3_MSA)
+		c->guest.ases_dyn |= MIPS_ASE_MSA;
+
+	if (config3 & MIPS_CONF_M)
+		c->guest.conf |= BIT(4);
+	return config3 & MIPS_CONF_M;
+}
+
+static inline unsigned int decode_guest_config4(struct cpuinfo_mips *c)
+{
+	unsigned int config4;
+
+	probe_gc0_config(config4, config4,
+			 MIPS_CONF_M | MIPS_CONF4_KSCREXIST);
+
+	c->guest.kscratch_mask = (config4 & MIPS_CONF4_KSCREXIST)
+				>> MIPS_CONF4_KSCREXIST_SHIFT;
+
+	if (config4 & MIPS_CONF_M)
+		c->guest.conf |= BIT(5);
+	return config4 & MIPS_CONF_M;
+}
+
+static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
+{
+	unsigned int config5, config5_dyn;
+
+	probe_gc0_config_dyn(config5, config5, config5_dyn,
+			 MIPS_CONF_M | MIPS_CONF5_MRP);
+
+	if (config5 & MIPS_CONF5_MRP)
+		c->guest.options |= MIPS_CPU_MAAR;
+	if (config5_dyn & MIPS_CONF5_MRP)
+		c->guest.options_dyn |= MIPS_CPU_MAAR;
+
+	if (config5 & MIPS_CONF5_LLB)
+		c->guest.options |= MIPS_CPU_RW_LLB;
+
+	if (config5 & MIPS_CONF_M)
+		c->guest.conf |= BIT(6);
+	return config5 & MIPS_CONF_M;
+}
+
+static inline void decode_guest_configs(struct cpuinfo_mips *c)
+{
+	unsigned int ok;
+
+	ok = decode_guest_config0(c);
+	if (ok)
+		ok = decode_guest_config1(c);
+	if (ok)
+		ok = decode_guest_config2(c);
+	if (ok)
+		ok = decode_guest_config3(c);
+	if (ok)
+		ok = decode_guest_config4(c);
+	if (ok)
+		decode_guest_config5(c);
+}
+
+static inline void cpu_probe_guestctl0(struct cpuinfo_mips *c)
+{
+	unsigned int guestctl0, temp;
+
+	guestctl0 = read_c0_guestctl0();
+
+	if (guestctl0 & MIPS_GCTL0_G0E)
+		c->options |= MIPS_CPU_GUESTCTL0EXT;
+	if (guestctl0 & MIPS_GCTL0_G1)
+		c->options |= MIPS_CPU_GUESTCTL1;
+	if (guestctl0 & MIPS_GCTL0_G2)
+		c->options |= MIPS_CPU_GUESTCTL2;
+	if (!(guestctl0 & MIPS_GCTL0_RAD)) {
+		c->options |= MIPS_CPU_GUESTID;
+
+		/*
+		 * Probe for Direct Root to Guest (DRG). Set GuestCtl1.RID = 0
+		 * first, otherwise all data accesses will be fully virtualised
+		 * as if they were performed by guest mode.
+		 */
+		write_c0_guestctl1(0);
+		tlbw_use_hazard();
+
+		write_c0_guestctl0(guestctl0 | MIPS_GCTL0_DRG);
+		back_to_back_c0_hazard();
+		temp = read_c0_guestctl0();
+
+		if (temp & MIPS_GCTL0_DRG) {
+			write_c0_guestctl0(guestctl0);
+			c->options |= MIPS_CPU_DRG;
+		}
+	}
+}
+
+static inline void cpu_probe_guestctl1(struct cpuinfo_mips *c)
+{
+	if (cpu_has_guestid) {
+		/* determine the number of bits of GuestID available */
+		write_c0_guestctl1(MIPS_GCTL1_ID);
+		back_to_back_c0_hazard();
+		c->guestid_mask = (read_c0_guestctl1() & MIPS_GCTL1_ID)
+						>> MIPS_GCTL1_ID_SHIFT;
+		write_c0_guestctl1(0);
+	}
+}
+
+static inline void cpu_probe_gtoffset(struct cpuinfo_mips *c)
+{
+	/* determine the number of bits of GTOffset available */
+	write_c0_gtoffset(0xffffffff);
+	back_to_back_c0_hazard();
+	c->gtoffset_mask = read_c0_gtoffset();
+	write_c0_gtoffset(0);
+}
+
+static inline void cpu_probe_vz(struct cpuinfo_mips *c)
+{
+	cpu_probe_guestctl0(c);
+	if (cpu_has_guestctl1)
+		cpu_probe_guestctl1(c);
+
+	cpu_probe_gtoffset(c);
+
+	decode_guest_configs(c);
+}
+
 #define R4K_OPTS (MIPS_CPU_TLB | MIPS_CPU_4KEX | MIPS_CPU_4K_CACHE \
 		| MIPS_CPU_COUNTER)
 
@@ -1172,7 +1470,7 @@
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
-		case PRID_REV_LOONGSON3A:
+		case PRID_REV_LOONGSON3A_R1:
 			c->cputype = CPU_LOONGSON3;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
@@ -1314,6 +1612,10 @@
 		c->cputype = CPU_P5600;
 		__cpu_name[cpu] = "MIPS P5600";
 		break;
+	case PRID_IMP_P6600:
+		c->cputype = CPU_P6600;
+		__cpu_name[cpu] = "MIPS P6600";
+		break;
 	case PRID_IMP_I6400:
 		c->cputype = CPU_I6400;
 		__cpu_name[cpu] = "MIPS I6400";
@@ -1322,6 +1624,10 @@
 		c->cputype = CPU_M5150;
 		__cpu_name[cpu] = "MIPS M5150";
 		break;
+	case PRID_IMP_M6250:
+		c->cputype = CPU_M6250;
+		__cpu_name[cpu] = "MIPS M6250";
+		break;
 	}
 
 	decode_configs(c);
@@ -1435,6 +1741,7 @@
 			c->cputype = CPU_BMIPS4380;
 			__cpu_name[cpu] = "Broadcom BMIPS4380";
 			set_elf_platform(cpu, "bmips4380");
+			c->options |= MIPS_CPU_RIXI;
 		} else {
 			c->cputype = CPU_BMIPS4350;
 			__cpu_name[cpu] = "Broadcom BMIPS4350";
@@ -1445,9 +1752,12 @@
 	case PRID_IMP_BMIPS5000:
 	case PRID_IMP_BMIPS5200:
 		c->cputype = CPU_BMIPS5000;
-		__cpu_name[cpu] = "Broadcom BMIPS5000";
+		if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_BMIPS5200)
+			__cpu_name[cpu] = "Broadcom BMIPS5200";
+		else
+			__cpu_name[cpu] = "Broadcom BMIPS5000";
 		set_elf_platform(cpu, "bmips5000");
-		c->options |= MIPS_CPU_ULRI;
+		c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI;
 		break;
 	}
 }
@@ -1481,6 +1791,8 @@
 		set_elf_platform(cpu, "octeon2");
 		break;
 	case PRID_IMP_CAVIUM_CN70XX:
+	case PRID_IMP_CAVIUM_CN73XX:
+	case PRID_IMP_CAVIUM_CNF75XX:
 	case PRID_IMP_CAVIUM_CN78XX:
 		c->cputype = CPU_CAVIUM_OCTEON3;
 		__cpu_name[cpu] = "Cavium Octeon III";
@@ -1493,6 +1805,29 @@
 	}
 }
 
+static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
+{
+	switch (c->processor_id & PRID_IMP_MASK) {
+	case PRID_IMP_LOONGSON_64:  /* Loongson-2/3 */
+		switch (c->processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON3A_R2:
+			c->cputype = CPU_LOONGSON3;
+			__cpu_name[cpu] = "ICT Loongson-3";
+			set_elf_platform(cpu, "loongson3a");
+			set_isa(c, MIPS_CPU_ISA_M64R2);
+			break;
+		}
+
+		decode_configs(c);
+		c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+		break;
+	default:
+		panic("Unknown Loongson Processor ID!");
+		break;
+	}
+}
+
 static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 {
 	decode_configs(c);
@@ -1640,6 +1975,9 @@
 	case PRID_COMP_CAVIUM:
 		cpu_probe_cavium(c, cpu);
 		break;
+	case PRID_COMP_LOONGSON:
+		cpu_probe_loongson(c, cpu);
+		break;
 	case PRID_COMP_INGENIC_D0:
 	case PRID_COMP_INGENIC_D1:
 	case PRID_COMP_INGENIC_E1:
@@ -1660,6 +1998,15 @@
 	 */
 	BUG_ON(current_cpu_type() != c->cputype);
 
+	if (cpu_has_rixi) {
+		/* Enable the RIXI exceptions */
+		set_c0_pagegrain(PG_IEC);
+		back_to_back_c0_hazard();
+		/* Verify the IEC bit is set */
+		if (read_c0_pagegrain() & PG_IEC)
+			c->options |= MIPS_CPU_RIXIEX;
+	}
+
 	if (mips_fpu_disabled)
 		c->options &= ~MIPS_CPU_FPU;
 
@@ -1699,6 +2046,9 @@
 		elf_hwcap |= HWCAP_MIPS_MSA;
 	}
 
+	if (cpu_has_vz)
+		cpu_probe_vz(c);
+
 	cpu_probe_vmbits(c);
 
 #ifdef CONFIG_64BIT
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index d434d5d..610f0f3 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -14,12 +14,22 @@
 static cpumask_t cpus_in_crash = CPU_MASK_NONE;
 
 #ifdef CONFIG_SMP
-static void crash_shutdown_secondary(void *ignore)
+static void crash_shutdown_secondary(void *passed_regs)
 {
-	struct pt_regs *regs;
+	struct pt_regs *regs = passed_regs;
 	int cpu = smp_processor_id();
 
-	regs = task_pt_regs(current);
+	/*
+	 * If we are passed registers, use those.  Otherwise get the
+	 * regs from the last interrupt, which should be correct, as
+	 * we are in an interrupt.  But if the regs are not there,
+	 * pull them from the top of the stack.  They are probably
+	 * wrong, but we need something to keep from crashing again.
+	 */
+	if (!regs)
+		regs = get_irq_regs();
+	if (!regs)
+		regs = task_pt_regs(current);
 
 	if (!cpu_online(cpu))
 		return;
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index baa7b6f..17326a9 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -130,7 +130,7 @@
 	/* end of rollback region (the region size must be power of two) */
 1:
 	jr	ra
-	nop
+	 nop
 	.set	pop
 	END(__r4k_wait)
 
@@ -172,7 +172,7 @@
 	mfc0	k0, CP0_EPC
 	.set	noreorder
 	j	k0
-	rfe
+	 rfe
 #else
 	and	k0, ST0_IE
 	bnez	k0, 1f
@@ -189,7 +189,7 @@
 	LONG_L	s0, TI_REGS($28)
 	LONG_S	sp, TI_REGS($28)
 	PTR_LA	ra, ret_from_irq
-	PTR_LA  v0, plat_irq_dispatch
+	PTR_LA	v0, plat_irq_dispatch
 	jr	v0
 #ifdef CONFIG_CPU_MICROMIPS
 	nop
@@ -292,7 +292,7 @@
 	MFC0	k0, CP0_DESAVE
 	.set	mips32
 	deret
-	.set pop
+	.set	pop
 	END(ejtag_debug_handler)
 
 /*
@@ -329,10 +329,10 @@
 	 * Clear BEV - required for page fault exception handler to work
 	 */
 	mfc0	k0, CP0_STATUS
-	ori     k0, k0, ST0_EXL
+	ori	k0, k0, ST0_EXL
 	li	k1, ~(ST0_BEV | ST0_ERL)
-	and     k0, k0, k1
-	mtc0    k0, CP0_STATUS
+	and	k0, k0, k1
+	mtc0	k0, CP0_STATUS
 	_ehb
 	SAVE_ALL
 	move	a0, sp
@@ -396,7 +396,7 @@
 
 	.macro	__BUILD_count exception
 	LONG_L	t0,exception_count_\exception
-	LONG_ADDIU t0, 1
+	LONG_ADDIU	t0, 1
 	LONG_S	t0,exception_count_\exception
 	.comm	exception_count\exception, 8, 8
 	.endm
@@ -455,10 +455,10 @@
 	.set	noreorder
 	/* check if TLB contains a entry for EPC */
 	MFC0	k1, CP0_ENTRYHI
-	andi	k1, 0xff	/* ASID_MASK */
+	andi	k1, MIPS_ENTRYHI_ASID | MIPS_ENTRYHI_ASIDX
 	MFC0	k0, CP0_EPC
-	PTR_SRL k0, _PAGE_SHIFT + 1
-	PTR_SLL k0, _PAGE_SHIFT + 1
+	PTR_SRL	k0, _PAGE_SHIFT + 1
+	PTR_SLL	k0, _PAGE_SHIFT + 1
 	or	k1, k0
 	MTC0	k1, CP0_ENTRYHI
 	mtc0_tlbw_hazard
@@ -478,27 +478,27 @@
 	/* microMIPS: 0x007d6b3c: rdhwr v1,$29 */
 	MFC0	k1, CP0_EPC
 #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS64_R2)
-	and     k0, k1, 1
-	beqz    k0, 1f
-	xor     k1, k0
-	lhu     k0, (k1)
-	lhu     k1, 2(k1)
-	ins     k1, k0, 16, 16
-	lui     k0, 0x007d
-	b       docheck
-	ori     k0, 0x6b3c
+	and	k0, k1, 1
+	beqz	k0, 1f
+	 xor	k1, k0
+	lhu	k0, (k1)
+	lhu	k1, 2(k1)
+	ins	k1, k0, 16, 16
+	lui	k0, 0x007d
+	b	docheck
+	 ori	k0, 0x6b3c
 1:
-	lui     k0, 0x7c03
-	lw      k1, (k1)
-	ori     k0, 0xe83b
+	lui	k0, 0x7c03
+	lw	k1, (k1)
+	ori	k0, 0xe83b
 #else
-	andi    k0, k1, 1
-	bnez    k0, handle_ri
-	lui     k0, 0x7c03
-	lw      k1, (k1)
-	ori     k0, 0xe83b
+	andi	k0, k1, 1
+	bnez	k0, handle_ri
+	 lui	k0, 0x7c03
+	lw	k1, (k1)
+	ori	k0, 0xe83b
 #endif
-	.set    reorder
+	.set	reorder
 docheck:
 	bne	k0, k1, handle_ri	/* if not ours */
 
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 4e4cc5b..56e8fed 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -21,7 +21,6 @@
 #include <asm/asmmacro.h>
 #include <asm/irqflags.h>
 #include <asm/regdef.h>
-#include <asm/pgtable-bits.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
 
@@ -132,7 +131,27 @@
 	set_saved_sp	sp, t0, t1
 	PTR_SUBU	sp, 4 * SZREG		# init stack pointer
 
+#ifdef CONFIG_RELOCATABLE
+	/* Copy kernel and apply the relocations */
+	jal		relocate_kernel
+
+	/* Repoint the sp into the new kernel image */
+	PTR_LI		sp, _THREAD_SIZE - 32 - PT_SIZE
+	PTR_ADDU	sp, $28
+	set_saved_sp	sp, t0, t1
+	PTR_SUBU	sp, 4 * SZREG		# init stack pointer
+
+	/*
+	 * relocate_kernel returns the entry point either
+	 * in the relocated kernel or the original if for
+	 * some reason relocation failed - jump there now
+	 * with instruction hazard barrier because of the
+	 * newly sync'd icache.
+	 */
+	jr.hb		v0
+#else
 	j		start_kernel
+#endif
 	END(kernel_entry)
 
 #ifdef CONFIG_SMP
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 46794d6..60ab4c4 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -181,6 +181,11 @@
 	case CPU_XLP:
 		cpu_wait = r4k_wait;
 		break;
+	case CPU_LOONGSON3:
+		if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2)
+			cpu_wait = r4k_wait;
+		break;
+
 	case CPU_BMIPS5000:
 		cpu_wait = r4k_wait_irqoff;
 		break;
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 3fff89a..625ee77 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -28,6 +28,7 @@
 #include <asm/inst.h>
 #include <asm/mips-r2-to-r6-emul.h>
 #include <asm/local.h>
+#include <asm/mipsregs.h>
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 
@@ -1251,10 +1252,10 @@
 			"	j	10b\n"
 			"	.previous\n"
 			"	.section	__ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1326,10 +1327,10 @@
 			"	j	10b\n"
 			"       .previous\n"
 			"	.section	__ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1397,10 +1398,10 @@
 			"	j	9b\n"
 			"	.previous\n"
 			"	.section        __ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1467,10 +1468,10 @@
 			"	j	9b\n"
 			"	.previous\n"
 			"	.section        __ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1582,14 +1583,14 @@
 			"	j	9b\n"
 			"	.previous\n"
 			"	.section        __ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
-			"	.word	5b,8b\n"
-			"	.word	6b,8b\n"
-			"	.word	7b,8b\n"
-			"	.word	0b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
+			STR(PTR) " 5b,8b\n"
+			STR(PTR) " 6b,8b\n"
+			STR(PTR) " 7b,8b\n"
+			STR(PTR) " 0b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1701,14 +1702,14 @@
 			"	j      9b\n"
 			"	.previous\n"
 			"	.section        __ex_table,\"a\"\n"
-			"	.word  1b,8b\n"
-			"	.word  2b,8b\n"
-			"	.word  3b,8b\n"
-			"	.word  4b,8b\n"
-			"	.word  5b,8b\n"
-			"	.word  6b,8b\n"
-			"	.word  7b,8b\n"
-			"	.word  0b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
+			STR(PTR) " 5b,8b\n"
+			STR(PTR) " 6b,8b\n"
+			STR(PTR) " 7b,8b\n"
+			STR(PTR) " 0b,8b\n"
 			"	.previous\n"
 			"	.set    pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1820,14 +1821,14 @@
 			"	j	9b\n"
 			"	.previous\n"
 			"	.section        __ex_table,\"a\"\n"
-			"	.word	1b,8b\n"
-			"	.word	2b,8b\n"
-			"	.word	3b,8b\n"
-			"	.word	4b,8b\n"
-			"	.word	5b,8b\n"
-			"	.word	6b,8b\n"
-			"	.word	7b,8b\n"
-			"	.word	0b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
+			STR(PTR) " 5b,8b\n"
+			STR(PTR) " 6b,8b\n"
+			STR(PTR) " 7b,8b\n"
+			STR(PTR) " 0b,8b\n"
 			"	.previous\n"
 			"	.set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -1938,14 +1939,14 @@
 			"       j	9b\n"
 			"       .previous\n"
 			"       .section        __ex_table,\"a\"\n"
-			"       .word	1b,8b\n"
-			"       .word	2b,8b\n"
-			"       .word	3b,8b\n"
-			"       .word	4b,8b\n"
-			"       .word	5b,8b\n"
-			"       .word	6b,8b\n"
-			"       .word	7b,8b\n"
-			"       .word	0b,8b\n"
+			STR(PTR) " 1b,8b\n"
+			STR(PTR) " 2b,8b\n"
+			STR(PTR) " 3b,8b\n"
+			STR(PTR) " 4b,8b\n"
+			STR(PTR) " 5b,8b\n"
+			STR(PTR) " 6b,8b\n"
+			STR(PTR) " 7b,8b\n"
+			STR(PTR) " 0b,8b\n"
 			"       .previous\n"
 			"       .set	pop\n"
 			: "+&r"(rt), "=&r"(rs),
@@ -2000,7 +2001,7 @@
 			"j	2b\n"
 			".previous\n"
 			".section        __ex_table,\"a\"\n"
-			".word  1b, 3b\n"
+			STR(PTR) " 1b,3b\n"
 			".previous\n"
 			: "=&r"(res), "+&r"(err)
 			: "r"(vaddr), "i"(SIGSEGV)
@@ -2058,7 +2059,7 @@
 			"j	2b\n"
 			".previous\n"
 			".section        __ex_table,\"a\"\n"
-			".word	1b, 3b\n"
+			STR(PTR) " 1b,3b\n"
 			".previous\n"
 			: "+&r"(res), "+&r"(err)
 			: "r"(vaddr), "i"(SIGSEGV));
@@ -2119,7 +2120,7 @@
 			"j	2b\n"
 			".previous\n"
 			".section        __ex_table,\"a\"\n"
-			".word  1b, 3b\n"
+			STR(PTR) " 1b,3b\n"
 			".previous\n"
 			: "=&r"(res), "+&r"(err)
 			: "r"(vaddr), "i"(SIGSEGV)
@@ -2182,7 +2183,7 @@
 			"j	2b\n"
 			".previous\n"
 			".section        __ex_table,\"a\"\n"
-			".word	1b, 3b\n"
+			STR(PTR) " 1b,3b\n"
 			".previous\n"
 			: "+&r"(res), "+&r"(err)
 			: "r"(vaddr), "i"(SIGSEGV));
diff --git a/arch/mips/kernel/module-rela.c b/arch/mips/kernel/module-rela.c
index 9083d63..7811688 100644
--- a/arch/mips/kernel/module-rela.c
+++ b/arch/mips/kernel/module-rela.c
@@ -16,6 +16,7 @@
  *  Copyright (C) 2001 Rusty Russell.
  *  Copyright (C) 2003, 2004 Ralf Baechle (ralf@linux-mips.org)
  *  Copyright (C) 2005 Thiemo Seufer
+ *  Copyright (C) 2015 Imagination Technologies Ltd.
  */
 
 #include <linux/elf.h>
@@ -35,15 +36,13 @@
 static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v)
 {
 	if (v % 4) {
-		pr_err("module %s: dangerous R_MIPS_26 RELArelocation\n",
+		pr_err("module %s: dangerous R_MIPS_26 RELA relocation\n",
 		       me->name);
 		return -ENOEXEC;
 	}
 
 	if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
-		printk(KERN_ERR
-		       "module %s: relocation overflow\n",
-		       me->name);
+		pr_err("module %s: relocation overflow\n", me->name);
 		return -ENOEXEC;
 	}
 
@@ -67,6 +66,48 @@
 	return 0;
 }
 
+static int apply_r_mips_pc_rela(struct module *me, u32 *location, Elf_Addr v,
+				unsigned bits)
+{
+	unsigned long mask = GENMASK(bits - 1, 0);
+	unsigned long se_bits;
+	long offset;
+
+	if (v % 4) {
+		pr_err("module %s: dangerous R_MIPS_PC%u RELA relocation\n",
+		       me->name, bits);
+		return -ENOEXEC;
+	}
+
+	offset = ((long)v - (long)location) >> 2;
+
+	/* check the sign bit onwards are identical - ie. we didn't overflow */
+	se_bits = (offset & BIT(bits - 1)) ? ~0ul : 0;
+	if ((offset & ~mask) != (se_bits & ~mask)) {
+		pr_err("module %s: relocation overflow\n", me->name);
+		return -ENOEXEC;
+	}
+
+	*location = (*location & ~mask) | (offset & mask);
+
+	return 0;
+}
+
+static int apply_r_mips_pc16_rela(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rela(me, location, v, 16);
+}
+
+static int apply_r_mips_pc21_rela(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rela(me, location, v, 21);
+}
+
+static int apply_r_mips_pc26_rela(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rela(me, location, v, 26);
+}
+
 static int apply_r_mips_64_rela(struct module *me, u32 *location, Elf_Addr v)
 {
 	*(Elf_Addr *)location = v;
@@ -99,9 +140,12 @@
 	[R_MIPS_26]		= apply_r_mips_26_rela,
 	[R_MIPS_HI16]		= apply_r_mips_hi16_rela,
 	[R_MIPS_LO16]		= apply_r_mips_lo16_rela,
+	[R_MIPS_PC16]		= apply_r_mips_pc16_rela,
 	[R_MIPS_64]		= apply_r_mips_64_rela,
 	[R_MIPS_HIGHER]		= apply_r_mips_higher_rela,
-	[R_MIPS_HIGHEST]	= apply_r_mips_highest_rela
+	[R_MIPS_HIGHEST]	= apply_r_mips_highest_rela,
+	[R_MIPS_PC21_S2]	= apply_r_mips_pc21_rela,
+	[R_MIPS_PC26_S2]	= apply_r_mips_pc26_rela,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
@@ -126,11 +170,11 @@
 		/* This is the symbol it is referring to */
 		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
 			+ ELF_MIPS_R_SYM(rel[i]);
-		if (IS_ERR_VALUE(sym->st_value)) {
+		if (sym->st_value >= -MAX_ERRNO) {
 			/* Ignore unresolved weak symbol */
 			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
 				continue;
-			printk(KERN_WARNING "%s: Unknown symbol %s\n",
+			pr_warn("%s: Unknown symbol %s\n",
 			       me->name, strtab + sym->st_name);
 			return -ENOENT;
 		}
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index f9b2936..79850e3 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -73,8 +73,7 @@
 	}
 
 	if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
-		printk(KERN_ERR
-		       "module %s: relocation overflow\n",
+		pr_err("module %s: relocation overflow\n",
 		       me->name);
 		return -ENOEXEC;
 	}
@@ -183,13 +182,62 @@
 	return -ENOEXEC;
 }
 
+static int apply_r_mips_pc_rel(struct module *me, u32 *location, Elf_Addr v,
+			       unsigned bits)
+{
+	unsigned long mask = GENMASK(bits - 1, 0);
+	unsigned long se_bits;
+	long offset;
+
+	if (v % 4) {
+		pr_err("module %s: dangerous R_MIPS_PC%u REL relocation\n",
+		       me->name, bits);
+		return -ENOEXEC;
+	}
+
+	/* retrieve & sign extend implicit addend */
+	offset = *location & mask;
+	offset |= (offset & BIT(bits - 1)) ? ~mask : 0;
+
+	offset += ((long)v - (long)location) >> 2;
+
+	/* check the sign bit onwards are identical - ie. we didn't overflow */
+	se_bits = (offset & BIT(bits - 1)) ? ~0ul : 0;
+	if ((offset & ~mask) != (se_bits & ~mask)) {
+		pr_err("module %s: relocation overflow\n", me->name);
+		return -ENOEXEC;
+	}
+
+	*location = (*location & ~mask) | (offset & mask);
+
+	return 0;
+}
+
+static int apply_r_mips_pc16_rel(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rel(me, location, v, 16);
+}
+
+static int apply_r_mips_pc21_rel(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rel(me, location, v, 21);
+}
+
+static int apply_r_mips_pc26_rel(struct module *me, u32 *location, Elf_Addr v)
+{
+	return apply_r_mips_pc_rel(me, location, v, 26);
+}
+
 static int (*reloc_handlers_rel[]) (struct module *me, u32 *location,
 				Elf_Addr v) = {
 	[R_MIPS_NONE]		= apply_r_mips_none,
 	[R_MIPS_32]		= apply_r_mips_32_rel,
 	[R_MIPS_26]		= apply_r_mips_26_rel,
 	[R_MIPS_HI16]		= apply_r_mips_hi16_rel,
-	[R_MIPS_LO16]		= apply_r_mips_lo16_rel
+	[R_MIPS_LO16]		= apply_r_mips_lo16_rel,
+	[R_MIPS_PC16]		= apply_r_mips_pc16_rel,
+	[R_MIPS_PC21_S2]	= apply_r_mips_pc21_rel,
+	[R_MIPS_PC26_S2]	= apply_r_mips_pc26_rel,
 };
 
 int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
@@ -215,12 +263,12 @@
 		/* This is the symbol it is referring to */
 		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
 			+ ELF_MIPS_R_SYM(rel[i]);
-		if (IS_ERR_VALUE(sym->st_value)) {
+		if (sym->st_value >= -MAX_ERRNO) {
 			/* Ignore unresolved weak symbol */
 			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
 				continue;
-			printk(KERN_WARNING "%s: Unknown symbol %s\n",
-			       me->name, strtab + sym->st_name);
+			pr_warn("%s: Unknown symbol %s\n",
+				me->name, strtab + sym->st_name);
 			return -ENOENT;
 		}
 
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 9bc1191..d3ba9f4 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -101,8 +101,6 @@
 
 static struct mips_pmu mipspmu;
 
-#define M_CONFIG1_PC	(1 << 4)
-
 #define M_PERFCTL_EXL			(1	<<  0)
 #define M_PERFCTL_KERNEL		(1	<<  1)
 #define M_PERFCTL_SUPERVISOR		(1	<<  2)
@@ -754,7 +752,7 @@
 
 static int __n_counters(void)
 {
-	if (!(read_c0_config1() & M_CONFIG1_PC))
+	if (!cpu_has_perf)
 		return 0;
 	if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
 		return 1;
@@ -825,6 +823,16 @@
 	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T },
 };
 
+static const struct mips_perf_event i6400_event_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]          = { 0x00, CNTR_EVEN | CNTR_ODD },
+	[PERF_COUNT_HW_INSTRUCTIONS]        = { 0x01, CNTR_EVEN | CNTR_ODD },
+	/* These only count dcache, not icache */
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = { 0x45, CNTR_EVEN | CNTR_ODD },
+	[PERF_COUNT_HW_CACHE_MISSES]        = { 0x48, CNTR_EVEN | CNTR_ODD },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x15, CNTR_EVEN | CNTR_ODD },
+	[PERF_COUNT_HW_BRANCH_MISSES]       = { 0x16, CNTR_EVEN | CNTR_ODD },
+};
+
 static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN },
 	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD },
@@ -1015,6 +1023,46 @@
 },
 };
 
+static const struct mips_perf_event i6400_cache_map
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x46, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x49, CNTR_EVEN | CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0x47, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x4a, CNTR_EVEN | CNTR_ODD },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x84, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x85, CNTR_EVEN | CNTR_ODD },
+	},
+},
+[C(DTLB)] = {
+	/* Can't distinguish read & write */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x40, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x41, CNTR_EVEN | CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0x40, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x41, CNTR_EVEN | CNTR_ODD },
+	},
+},
+[C(BPU)] = {
+	/* Conditional branches / mispredicted */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x15, CNTR_EVEN | CNTR_ODD },
+		[C(RESULT_MISS)]	= { 0x16, CNTR_EVEN | CNTR_ODD },
+	},
+},
+};
+
 static const struct mips_perf_event loongson3_cache_map
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1556,6 +1604,7 @@
 #endif
 		break;
 	case CPU_P5600:
+	case CPU_P6600:
 	case CPU_I6400:
 		/* 8-bit event numbers */
 		raw_id = config & 0x1ff;
@@ -1718,11 +1767,16 @@
 		mipspmu.general_event_map = &mipsxxcore_event_map2;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map2;
 		break;
-	case CPU_I6400:
-		mipspmu.name = "mips/I6400";
+	case CPU_P6600:
+		mipspmu.name = "mips/P6600";
 		mipspmu.general_event_map = &mipsxxcore_event_map2;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map2;
 		break;
+	case CPU_I6400:
+		mipspmu.name = "mips/I6400";
+		mipspmu.general_event_map = &i6400_event_map;
+		mipspmu.cache_event_map = &i6400_cache_map;
+		break;
 	case CPU_1004K:
 		mipspmu.name = "mips/1004K";
 		mipspmu.general_event_map = &mipsxxcore_event_map;
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index fa3f9eb..adda3ff 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -224,11 +224,18 @@
 	uasm_build_label(pl, *pp, lbl);
 
 	/* Generate the cache ops */
-	for (i = 0; i < unroll_lines; i++)
-		uasm_i_cache(pp, op, i * cache->linesz, t0);
+	for (i = 0; i < unroll_lines; i++) {
+		if (cpu_has_mips_r6) {
+			uasm_i_cache(pp, op, 0, t0);
+			uasm_i_addiu(pp, t0, t0, cache->linesz);
+		} else {
+			uasm_i_cache(pp, op, i * cache->linesz, t0);
+		}
+	}
 
-	/* Update the base address */
-	uasm_i_addiu(pp, t0, t0, unroll_lines * cache->linesz);
+	if (!cpu_has_mips_r6)
+		/* Update the base address */
+		uasm_i_addiu(pp, t0, t0, unroll_lines * cache->linesz);
 
 	/* Loop if we haven't reached the end address yet */
 	uasm_il_bne(pp, pr, t0, t1, lbl);
diff --git a/arch/mips/kernel/pm.c b/arch/mips/kernel/pm.c
index fefdf39..dc81489 100644
--- a/arch/mips/kernel/pm.c
+++ b/arch/mips/kernel/pm.c
@@ -56,7 +56,7 @@
 		write_c0_userlocal(current_thread_info()->tp_value);
 
 	/* Restore watch registers */
-	__restore_watch();
+	__restore_watch(current);
 }
 
 /**
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 298b2b7..97dc01b 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -114,6 +114,7 @@
 	if (cpu_has_smartmips)	seq_printf(m, "%s", " smartmips");
 	if (cpu_has_dsp)	seq_printf(m, "%s", " dsp");
 	if (cpu_has_dsp2)	seq_printf(m, "%s", " dsp2");
+	if (cpu_has_dsp3)	seq_printf(m, "%s", " dsp3");
 	if (cpu_has_mipsmt)	seq_printf(m, "%s", " mt");
 	if (cpu_has_mmips)	seq_printf(m, "%s", " micromips");
 	if (cpu_has_vz)		seq_printf(m, "%s", " vz");
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 92880ce..a6b3dc5 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -77,10 +77,6 @@
 {
 }
 
-void flush_thread(void)
-{
-}
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	/*
@@ -455,7 +451,7 @@
 		    *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
 			regs = (struct pt_regs *)*sp;
 			pc = regs->cp0_epc;
-			if (__kernel_text_address(pc)) {
+			if (!user_mode(regs) && __kernel_text_address(pc)) {
 				*sp = regs->regs[29];
 				*ra = regs->regs[31];
 				return pc;
@@ -580,11 +576,19 @@
 	return value;
 }
 
+static void prepare_for_fp_mode_switch(void *info)
+{
+	struct mm_struct *mm = info;
+
+	if (current->mm == mm)
+		lose_fpu(1);
+}
+
 int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 {
 	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
-	unsigned long switch_count;
 	struct task_struct *t;
+	int max_users;
 
 	/* Check the value is valid */
 	if (value & ~known_bits)
@@ -601,6 +605,9 @@
 	if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
 		return -EOPNOTSUPP;
 
+	/* Proceed with the mode switch */
+	preempt_disable();
+
 	/* Save FP & vector context, then disable FPU & MSA */
 	if (task->signal == current->signal)
 		lose_fpu(1);
@@ -610,31 +617,17 @@
 	smp_mb__after_atomic();
 
 	/*
-	 * If there are multiple online CPUs then wait until all threads whose
-	 * FP mode is about to change have been context switched. This approach
-	 * allows us to only worry about whether an FP mode switch is in
-	 * progress when FP is first used in a tasks time slice. Pretty much all
-	 * of the mode switch overhead can thus be confined to cases where mode
-	 * switches are actually occurring. That is, to here. However for the
-	 * thread performing the mode switch it may take a while...
+	 * If there are multiple online CPUs then force any which are running
+	 * threads in this process to lose their FPU context, which they can't
+	 * regain until fp_mode_switching is cleared later.
 	 */
 	if (num_online_cpus() > 1) {
-		spin_lock_irq(&task->sighand->siglock);
+		/* No need to send an IPI for the local CPU */
+		max_users = (task->mm == current->mm) ? 1 : 0;
 
-		for_each_thread(task, t) {
-			if (t == current)
-				continue;
-
-			switch_count = t->nvcsw + t->nivcsw;
-
-			do {
-				spin_unlock_irq(&task->sighand->siglock);
-				cond_resched();
-				spin_lock_irq(&task->sighand->siglock);
-			} while ((t->nvcsw + t->nivcsw) == switch_count);
-		}
-
-		spin_unlock_irq(&task->sighand->siglock);
+		if (atomic_read(&current->mm->mm_users) > max_users)
+			smp_call_function(prepare_for_fp_mode_switch,
+					  (void *)current->mm, 1);
 	}
 
 	/*
@@ -659,6 +652,7 @@
 
 	/* Allow threads to use FP again */
 	atomic_set(&task->mm->context.fp_mode_switching, 0);
+	preempt_enable();
 
 	return 0;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index a5279b2..0dcf691 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -57,8 +57,7 @@
 	/* Begin with data registers set to all 1s... */
 	memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
 
-	/* ...and FCSR zeroed */
-	target->thread.fpu.fcr31 = 0;
+	/* FCSR has been preset by `mips_set_personality_nan'.  */
 
 	/*
 	 * Record that the target has "used" math, such that the context
@@ -80,6 +79,22 @@
 }
 
 /*
+ * Poke at FCSR according to its mask.  Don't set the cause bits as
+ * this is currently not handled correctly in FP context restoration
+ * and will cause an oops if a corresponding enable bit is set.
+ */
+static void ptrace_setfcr31(struct task_struct *child, u32 value)
+{
+	u32 fcr31;
+	u32 mask;
+
+	value &= ~FPU_CSR_ALL_X;
+	fcr31 = child->thread.fpu.fcr31;
+	mask = boot_cpu_data.fpu_msk31;
+	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
+}
+
+/*
  * Read a general register set.	 We always use the 64-bit format, even
  * for 32-bit kernels and for 32-bit processes on a 64-bit kernel.
  * Registers are sign extended to fill the available space.
@@ -159,9 +174,7 @@
 {
 	union fpureg *fregs;
 	u64 fpr_val;
-	u32 fcr31;
 	u32 value;
-	u32 mask;
 	int i;
 
 	if (!access_ok(VERIFY_READ, data, 33 * 8))
@@ -176,9 +189,7 @@
 	}
 
 	__get_user(value, data + 64);
-	fcr31 = child->thread.fpu.fcr31;
-	mask = boot_cpu_data.fpu_msk31;
-	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
+	ptrace_setfcr31(child, value);
 
 	/* FIR may not be written.  */
 
@@ -210,7 +221,8 @@
 	for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) {
 		__put_user(child->thread.watch.mips3264.watchlo[i],
 			   &addr->WATCH_STYLE.watchlo[i]);
-		__put_user(child->thread.watch.mips3264.watchhi[i] & 0xfff,
+		__put_user(child->thread.watch.mips3264.watchhi[i] &
+				(MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW),
 			   &addr->WATCH_STYLE.watchhi[i]);
 		__put_user(boot_cpu_data.watch_reg_masks[i],
 			   &addr->WATCH_STYLE.watch_masks[i]);
@@ -252,12 +264,12 @@
 		}
 #endif
 		__get_user(ht[i], &addr->WATCH_STYLE.watchhi[i]);
-		if (ht[i] & ~0xff8)
+		if (ht[i] & ~MIPS_WATCHHI_MASK)
 			return -EINVAL;
 	}
 	/* Install them. */
 	for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) {
-		if (lt[i] & 7)
+		if (lt[i] & MIPS_WATCHLO_IRW)
 			watch_active = 1;
 		child->thread.watch.mips3264.watchlo[i] = lt[i];
 		/* Set the G bit. */
@@ -805,7 +817,7 @@
 			break;
 #endif
 		case FPC_CSR:
-			child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X;
+			ptrace_setfcr31(child, data);
 			break;
 		case DSP_BASE ... DSP_BASE + 5: {
 			dspreg_t *dregs;
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 17732f8..56d86b0 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -244,17 +244,17 @@
 	.set	push
 	.set	noat
 #ifdef CONFIG_64BIT
-	copy_u_d \wr, 1
+	copy_s_d \wr, 1
 	EX sd	$1, \off(\base)
 #elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	copy_u_w \wr, 2
+	copy_s_w \wr, 2
 	EX sw	$1, \off(\base)
-	copy_u_w \wr, 3
+	copy_s_w \wr, 3
 	EX sw	$1, (\off+4)(\base)
 #else /* CONFIG_CPU_BIG_ENDIAN */
-	copy_u_w \wr, 2
+	copy_s_w \wr, 2
 	EX sw	$1, (\off+4)(\base)
-	copy_u_w \wr, 3
+	copy_s_w \wr, 3
 	EX sw	$1, \off(\base)
 #endif
 	.set	pop
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 92cd051..2f0a3b2 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -15,7 +15,6 @@
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
-#include <asm/pgtable-bits.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/thread_info.h>
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
new file mode 100644
index 0000000..ca1cc30
--- /dev/null
+++ b/arch/mips/kernel/relocate.c
@@ -0,0 +1,386 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Support for Kernel relocation at boot time
+ *
+ * Copyright (C) 2015, Imagination Technologies Ltd.
+ * Authors: Matt Redfearn (matt.redfearn@imgtec.com)
+ */
+#include <asm/bootinfo.h>
+#include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
+#include <linux/sched.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+
+#define RELOCATED(x) ((void *)((long)x + offset))
+
+extern u32 _relocation_start[];	/* End kernel image / start relocation table */
+extern u32 _relocation_end[];	/* End relocation table */
+
+extern long __start___ex_table;	/* Start exception table */
+extern long __stop___ex_table;	/* End exception table */
+
+static inline u32 __init get_synci_step(void)
+{
+	u32 res;
+
+	__asm__("rdhwr  %0, $1" : "=r" (res));
+
+	return res;
+}
+
+static void __init sync_icache(void *kbase, unsigned long kernel_length)
+{
+	void *kend = kbase + kernel_length;
+	u32 step = get_synci_step();
+
+	do {
+		__asm__ __volatile__(
+			"synci  0(%0)"
+			: /* no output */
+			: "r" (kbase));
+
+		kbase += step;
+	} while (kbase < kend);
+
+	/* Completion barrier */
+	__sync();
+}
+
+static int __init apply_r_mips_64_rel(u32 *loc_orig, u32 *loc_new, long offset)
+{
+	*(u64 *)loc_new += offset;
+
+	return 0;
+}
+
+static int __init apply_r_mips_32_rel(u32 *loc_orig, u32 *loc_new, long offset)
+{
+	*loc_new += offset;
+
+	return 0;
+}
+
+static int __init apply_r_mips_26_rel(u32 *loc_orig, u32 *loc_new, long offset)
+{
+	unsigned long target_addr = (*loc_orig) & 0x03ffffff;
+
+	if (offset % 4) {
+		pr_err("Dangerous R_MIPS_26 REL relocation\n");
+		return -ENOEXEC;
+	}
+
+	/* Original target address */
+	target_addr <<= 2;
+	target_addr += (unsigned long)loc_orig & ~0x03ffffff;
+
+	/* Get the new target address */
+	target_addr += offset;
+
+	if ((target_addr & 0xf0000000) != ((unsigned long)loc_new & 0xf0000000)) {
+		pr_err("R_MIPS_26 REL relocation overflow\n");
+		return -ENOEXEC;
+	}
+
+	target_addr -= (unsigned long)loc_new & ~0x03ffffff;
+	target_addr >>= 2;
+
+	*loc_new = (*loc_new & ~0x03ffffff) | (target_addr & 0x03ffffff);
+
+	return 0;
+}
+
+
+static int __init apply_r_mips_hi16_rel(u32 *loc_orig, u32 *loc_new, long offset)
+{
+	unsigned long insn = *loc_orig;
+	unsigned long target = (insn & 0xffff) << 16; /* high 16bits of target */
+
+	target += offset;
+
+	*loc_new = (insn & ~0xffff) | ((target >> 16) & 0xffff);
+	return 0;
+}
+
+static int (*reloc_handlers_rel[]) (u32 *, u32 *, long) __initdata = {
+	[R_MIPS_64]		= apply_r_mips_64_rel,
+	[R_MIPS_32]		= apply_r_mips_32_rel,
+	[R_MIPS_26]		= apply_r_mips_26_rel,
+	[R_MIPS_HI16]		= apply_r_mips_hi16_rel,
+};
+
+int __init do_relocations(void *kbase_old, void *kbase_new, long offset)
+{
+	u32 *r;
+	u32 *loc_orig;
+	u32 *loc_new;
+	int type;
+	int res;
+
+	for (r = _relocation_start; r < _relocation_end; r++) {
+		/* Sentinel for last relocation */
+		if (*r == 0)
+			break;
+
+		type = (*r >> 24) & 0xff;
+		loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2));
+		loc_new = RELOCATED(loc_orig);
+
+		if (reloc_handlers_rel[type] == NULL) {
+			/* Unsupported relocation */
+			pr_err("Unhandled relocation type %d at 0x%pK\n",
+			       type, loc_orig);
+			return -ENOEXEC;
+		}
+
+		res = reloc_handlers_rel[type](loc_orig, loc_new, offset);
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+/*
+ * The exception table is filled in by the relocs tool after vmlinux is linked.
+ * It must be relocated separately since there will not be any relocation
+ * information for it filled in by the linker.
+ */
+static int __init relocate_exception_table(long offset)
+{
+	unsigned long *etable_start, *etable_end, *e;
+
+	etable_start = RELOCATED(&__start___ex_table);
+	etable_end = RELOCATED(&__stop___ex_table);
+
+	for (e = etable_start; e < etable_end; e++)
+		*e += offset;
+
+	return 0;
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+
+static inline __init unsigned long rotate_xor(unsigned long hash,
+					      const void *area, size_t size)
+{
+	size_t i;
+	unsigned long *ptr = (unsigned long *)area;
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+static inline __init unsigned long get_random_boot(void)
+{
+	unsigned long entropy = random_get_entropy();
+	unsigned long hash = 0;
+
+	/* Attempt to create a simple but unpredictable starting entropy. */
+	hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+
+	/* Add in any runtime entropy we can get */
+	hash = rotate_xor(hash, &entropy, sizeof(entropy));
+
+#if defined(CONFIG_USE_OF)
+	/* Get any additional entropy passed in device tree */
+	{
+		int node, len;
+		u64 *prop;
+
+		node = fdt_path_offset(initial_boot_params, "/chosen");
+		if (node >= 0) {
+			prop = fdt_getprop_w(initial_boot_params, node,
+					     "kaslr-seed", &len);
+			if (prop && (len == sizeof(u64)))
+				hash = rotate_xor(hash, prop, sizeof(*prop));
+		}
+	}
+#endif /* CONFIG_USE_OF */
+
+	return hash;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	char *str;
+
+#if defined(CONFIG_CMDLINE_BOOL)
+	const char *builtin_cmdline = CONFIG_CMDLINE;
+
+	str = strstr(builtin_cmdline, "nokaslr");
+	if (str == builtin_cmdline ||
+	    (str > builtin_cmdline && *(str - 1) == ' '))
+		return true;
+#endif
+	str = strstr(arcs_cmdline, "nokaslr");
+	if (str == arcs_cmdline || (str > arcs_cmdline && *(str - 1) == ' '))
+		return true;
+
+	return false;
+}
+
+static inline void __init *determine_relocation_address(void)
+{
+	/* Choose a new address for the kernel */
+	unsigned long kernel_length;
+	void *dest = &_text;
+	unsigned long offset;
+
+	if (kaslr_disabled())
+		return dest;
+
+	kernel_length = (long)_end - (long)(&_text);
+
+	offset = get_random_boot() << 16;
+	offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+	if (offset < kernel_length)
+		offset += ALIGN(kernel_length, 0xffff);
+
+	return RELOCATED(dest);
+}
+
+#else
+
+static inline void __init *determine_relocation_address(void)
+{
+	/*
+	 * Choose a new address for the kernel
+	 * For now we'll hard code the destination
+	 */
+	return (void *)0xffffffff81000000;
+}
+
+#endif
+
+static inline int __init relocation_addr_valid(void *loc_new)
+{
+	if ((unsigned long)loc_new & 0x0000ffff) {
+		/* Inappropriately aligned new location */
+		return 0;
+	}
+	if ((unsigned long)loc_new < (unsigned long)&_end) {
+		/* New location overlaps original kernel */
+		return 0;
+	}
+	return 1;
+}
+
+void *__init relocate_kernel(void)
+{
+	void *loc_new;
+	unsigned long kernel_length;
+	unsigned long bss_length;
+	long offset = 0;
+	int res = 1;
+	/* Default to original kernel entry point */
+	void *kernel_entry = start_kernel;
+
+	/* Get the command line */
+	fw_init_cmdline();
+#if defined(CONFIG_USE_OF)
+	/* Deal with the device tree */
+	early_init_dt_scan(plat_get_fdt());
+	if (boot_command_line[0]) {
+		/* Boot command line was passed in device tree */
+		strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+	}
+#endif /* CONFIG_USE_OF */
+
+	kernel_length = (long)(&_relocation_start) - (long)(&_text);
+	bss_length = (long)&__bss_stop - (long)&__bss_start;
+
+	loc_new = determine_relocation_address();
+
+	/* Sanity check relocation address */
+	if (relocation_addr_valid(loc_new))
+		offset = (unsigned long)loc_new - (unsigned long)(&_text);
+
+	/* Reset the command line now so we don't end up with a duplicate */
+	arcs_cmdline[0] = '\0';
+
+	if (offset) {
+		/* Copy the kernel to it's new location */
+		memcpy(loc_new, &_text, kernel_length);
+
+		/* Perform relocations on the new kernel */
+		res = do_relocations(&_text, loc_new, offset);
+		if (res < 0)
+			goto out;
+
+		/* Sync the caches ready for execution of new kernel */
+		sync_icache(loc_new, kernel_length);
+
+		res = relocate_exception_table(offset);
+		if (res < 0)
+			goto out;
+
+		/*
+		 * The original .bss has already been cleared, and
+		 * some variables such as command line parameters
+		 * stored to it so make a copy in the new location.
+		 */
+		memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length);
+
+		/* The current thread is now within the relocated image */
+		__current_thread_info = RELOCATED(&init_thread_union);
+
+		/* Return the new kernel's entry point */
+		kernel_entry = RELOCATED(start_kernel);
+	}
+out:
+	return kernel_entry;
+}
+
+/*
+ * Show relocation information on panic.
+ */
+void show_kernel_relocation(const char *level)
+{
+	unsigned long offset;
+
+	offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
+
+	if (IS_ENABLED(CONFIG_RELOCATABLE) && offset > 0) {
+		printk(level);
+		pr_cont("Kernel relocated by 0x%pK\n", (void *)offset);
+		pr_cont(" .text @ 0x%pK\n", _text);
+		pr_cont(" .data @ 0x%pK\n", _sdata);
+		pr_cont(" .bss  @ 0x%pK\n", __bss_start);
+	}
+}
+
+static int kernel_location_notifier_fn(struct notifier_block *self,
+				       unsigned long v, void *p)
+{
+	show_kernel_relocation(KERN_EMERG);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kernel_location_notifier = {
+	.notifier_call = kernel_location_notifier_fn
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_location_notifier);
+	return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index d01fe53..c8e43e0 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -35,7 +35,6 @@
 
 	lw	t1, PT_EPC(sp)		# skip syscall on return
 
-	subu	v0, v0, __NR_O32_Linux	# check syscall number
 	addiu	t1, 4			# skip to next instruction
 	sw	t1, PT_EPC(sp)
 
@@ -89,6 +88,7 @@
 	and	t0, t1
 	bnez	t0, syscall_trace_entry # -> yes
 syscall_common:
+	subu	v0, v0, __NR_O32_Linux	# check syscall number
 	sltiu	t0, v0, __NR_O32_Linux_syscalls + 1
 	beqz	t0, illegal_syscall
 
@@ -118,24 +118,23 @@
 
 syscall_trace_entry:
 	SAVE_STATIC
-	move	s0, v0
 	move	a0, sp
 
 	/*
 	 * syscall number is in v0 unless we called syscall(__NR_###)
 	 * where the real syscall number is in a0
 	 */
-	addiu	a1, v0,  __NR_O32_Linux
-	bnez	v0, 1f /* __NR_syscall at offset 0 */
+	move	a1, v0
+	subu	t2, v0,  __NR_O32_Linux
+	bnez	t2, 1f /* __NR_syscall at offset 0 */
 	lw	a1, PT_R4(sp)
 
 1:	jal	syscall_trace_enter
 
 	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	v0, s0			# restore syscall
-
 	RESTORE_STATIC
+	lw	v0, PT_R2(sp)		# Restore syscall (maybe modified)
 	lw	a0, PT_R4(sp)		# Restore argument registers
 	lw	a1, PT_R5(sp)
 	lw	a2, PT_R6(sp)
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 6b73ecc..e6ede12 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -82,15 +82,14 @@
 
 syscall_trace_entry:
 	SAVE_STATIC
-	move	s0, v0
 	move	a0, sp
 	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	v0, s0
 	RESTORE_STATIC
+	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
 	ld	a2, PT_R6(sp)
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 71f99d5..9c0b387 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -42,9 +42,6 @@
 #endif
 	beqz	t0, not_n32_scall
 
-	dsll	t0, v0, 3		# offset into table
-	ld	t2, (sysn32_call_table - (__NR_N32_Linux * 8))(t0)
-
 	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
 
 	li	t1, _TIF_WORK_SYSCALL_ENTRY
@@ -53,6 +50,9 @@
 	bnez	t0, n32_syscall_trace_entry
 
 syscall_common:
+	dsll	t0, v0, 3		# offset into table
+	ld	t2, (sysn32_call_table - (__NR_N32_Linux * 8))(t0)
+
 	jalr	t2			# Do The Real Thing (TM)
 
 	li	t0, -EMAXERRNO - 1	# error?
@@ -71,21 +71,25 @@
 
 n32_syscall_trace_entry:
 	SAVE_STATIC
-	move	s0, t2
 	move	a0, sp
 	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	t2, s0
 	RESTORE_STATIC
+	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
 	ld	a2, PT_R6(sp)
 	ld	a3, PT_R7(sp)
 	ld	a4, PT_R8(sp)
 	ld	a5, PT_R9(sp)
+
+	dsubu	t2, v0, __NR_N32_Linux	# check (new) syscall number
+	sltiu   t0, t2, __NR_N32_Linux_syscalls + 1
+	beqz	t0, not_n32_scall
+
 	j	syscall_common
 
 1:	j	syscall_exit
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 91b43ee..f4f28b1 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -52,9 +52,6 @@
 	sll	a2, a2, 0
 	sll	a3, a3, 0
 
-	dsll	t0, v0, 3		# offset into table
-	ld	t2, (sys32_call_table - (__NR_O32_Linux * 8))(t0)
-
 	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
 
 	/*
@@ -88,6 +85,9 @@
 	bnez	t0, trace_a_syscall
 
 syscall_common:
+	dsll	t0, v0, 3		# offset into table
+	ld	t2, (sys32_call_table - (__NR_O32_Linux * 8))(t0)
+
 	jalr	t2			# Do The Real Thing (TM)
 
 	li	t0, -EMAXERRNO - 1	# error?
@@ -112,7 +112,6 @@
 	sd	a6, PT_R10(sp)
 	sd	a7, PT_R11(sp)		# For indirect syscalls
 
-	move	s0, t2			# Save syscall pointer
 	move	a0, sp
 	/*
 	 * absolute syscall number is in v0 unless we called syscall(__NR_###)
@@ -133,8 +132,8 @@
 
 	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	t2, s0
 	RESTORE_STATIC
+	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
 	ld	a2, PT_R6(sp)
@@ -143,6 +142,11 @@
 	ld	a5, PT_R9(sp)
 	ld	a6, PT_R10(sp)
 	ld	a7, PT_R11(sp)		# For indirect syscalls
+
+	dsubu	t0, v0, __NR_O32_Linux	# check (new) syscall number
+	sltiu	t0, t0, __NR_O32_Linux_syscalls + 1
+	beqz	t0, not_o32_scall
+
 	j	syscall_common
 
 1:	j	syscall_exit
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 4f60734..ef408a0 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -26,6 +26,7 @@
 #include <linux/sizes.h>
 #include <linux/device.h>
 #include <linux/dma-contiguous.h>
+#include <linux/decompress/generic.h>
 
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
@@ -52,13 +53,6 @@
 #endif
 
 /*
- * Despite it's name this variable is even if we don't have PCI
- */
-unsigned int PCI_DMA_BUS_IS_PHYS;
-
-EXPORT_SYMBOL(PCI_DMA_BUS_IS_PHYS);
-
-/*
  * Setup information
  *
  * These are initialized so they are in the .data section
@@ -250,6 +244,35 @@
 	return 0;
 }
 
+/* In some conditions (e.g. big endian bootloader with a little endian
+   kernel), the initrd might appear byte swapped.  Try to detect this and
+   byte swap it if needed.  */
+static void __init maybe_bswap_initrd(void)
+{
+#if defined(CONFIG_CPU_CAVIUM_OCTEON)
+	u64 buf;
+
+	/* Check for CPIO signature */
+	if (!memcmp((void *)initrd_start, "070701", 6))
+		return;
+
+	/* Check for compressed initrd */
+	if (decompress_method((unsigned char *)initrd_start, 8, NULL))
+		return;
+
+	/* Try again with a byte swapped header */
+	buf = swab64p((u64 *)initrd_start);
+	if (!memcmp(&buf, "070701", 6) ||
+	    decompress_method((unsigned char *)(&buf), 8, NULL)) {
+		unsigned long i;
+
+		pr_info("Byteswapped initrd detected\n");
+		for (i = initrd_start; i < ALIGN(initrd_end, 8); i += 8)
+			swab64s((u64 *)i);
+	}
+#endif
+}
+
 static void __init finalize_initrd(void)
 {
 	unsigned long size = initrd_end - initrd_start;
@@ -263,6 +286,8 @@
 		goto disable;
 	}
 
+	maybe_bswap_initrd();
+
 	reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
 	initrd_below_start_ok = 1;
 
@@ -469,6 +494,29 @@
 	 */
 	reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);
 
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * The kernel reserves all memory below its _end symbol as bootmem,
+	 * but the kernel may now be at a much higher address. The memory
+	 * between the original and new locations may be returned to the system.
+	 */
+	if (__pa_symbol(_text) > __pa_symbol(VMLINUX_LOAD_ADDRESS)) {
+		unsigned long offset;
+		extern void show_kernel_relocation(const char *level);
+
+		offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
+		free_bootmem(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
+
+#if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO)
+		/*
+		 * This information is necessary when debugging the kernel
+		 * But is a security vulnerability otherwise!
+		 */
+		show_kernel_relocation(KERN_INFO);
+#endif
+	}
+#endif
+
 	/*
 	 * Reserve initrd memory if needed.
 	 */
@@ -624,6 +672,8 @@
 #define USE_PROM_CMDLINE	IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
 #define USE_DTB_CMDLINE		IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
 #define EXTEND_WITH_PROM	IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
+#define BUILTIN_EXTEND_WITH_PROM	\
+	IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
 
 static void __init arch_mem_init(char **cmdline_p)
 {
@@ -657,15 +707,23 @@
 		strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
 
 	if (EXTEND_WITH_PROM && arcs_cmdline[0]) {
-		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		if (boot_command_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
 		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
 	}
 
 #if defined(CONFIG_CMDLINE_BOOL)
 	if (builtin_cmdline[0]) {
-		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		if (boot_command_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
 		strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
 	}
+
+	if (BUILTIN_EXTEND_WITH_PROM && arcs_cmdline[0]) {
+		if (boot_command_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
+	}
 #endif
 #endif
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
@@ -706,6 +764,9 @@
 	for_each_memblock(reserved, reg)
 		if (reg->size != 0)
 			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+
+	reserve_bootmem_region(__pa_symbol(&__nosave_begin),
+			__pa_symbol(&__nosave_end)); /* Reserve for hibernation */
 }
 
 static void __init resource_init(void)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index bf792e2..ab04229 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -195,6 +195,9 @@
 	unsigned int csr;
 	int i, err;
 
+	if (!config_enabled(CONFIG_CPU_HAS_MSA))
+		return SIGSYS;
+
 	if (size != sizeof(*msa))
 		return -EINVAL;
 
@@ -398,8 +401,8 @@
 	}
 
 fp_done:
-	if (used & USED_EXTCONTEXT)
-		err |= restore_extcontext(sc_to_extcontext(sc));
+	if (!err && (used & USED_EXTCONTEXT))
+		err = restore_extcontext(sc_to_extcontext(sc));
 
 	return err ?: sig;
 }
@@ -798,7 +801,7 @@
 		regs->regs[0] = 0;		/* Don't deal with this again.	*/
 	}
 
-	if (sig_uses_siginfo(&ksig->ka))
+	if (sig_uses_siginfo(&ksig->ka, abi))
 		ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn,
 					  ksig, regs, oldset);
 	else
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 4909639..78c8349 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -227,6 +227,12 @@
 			err |= __put_user(from->si_uid, &to->si_uid);
 			err |= __put_user(from->si_int, &to->si_int);
 			break;
+		case __SI_SYS >> 16:
+			err |= __copy_to_user(&to->si_call_addr, &from->si_call_addr,
+					      sizeof(compat_uptr_t));
+			err |= __put_user(from->si_syscall, &to->si_syscall);
+			err |= __put_user(from->si_arch, &to->si_arch);
+			break;
 		}
 	}
 	return err;
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 78cf8c2..e02addc 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -243,6 +243,7 @@
 		break;
 	case CPU_BMIPS5000:
 		write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));
+		current_cpu_data.core = (read_c0_brcm_config() >> 25) & 3;
 		break;
 	}
 }
@@ -565,3 +566,90 @@
 	 * once the wired entries are present.
 	 */
 }
+
+void __init bmips_cpu_setup(void)
+{
+	void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
+	u32 __maybe_unused cfg;
+
+	switch (current_cpu_type()) {
+	case CPU_BMIPS3300:
+		/* Set BIU to async mode */
+		set_c0_brcm_bus_pll(BIT(22));
+		__sync();
+
+		/* put the BIU back in sync mode */
+		clear_c0_brcm_bus_pll(BIT(22));
+
+		/* clear BHTD to enable branch history table */
+		clear_c0_brcm_reset(BIT(16));
+
+		/* Flush and enable RAC */
+		cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
+		__raw_writel(cfg | 0x100, BMIPS_RAC_CONFIG);
+		__raw_readl(cbr + BMIPS_RAC_CONFIG);
+
+		cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
+		__raw_writel(cfg | 0xf, BMIPS_RAC_CONFIG);
+		__raw_readl(cbr + BMIPS_RAC_CONFIG);
+
+		cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE);
+		__raw_writel(cfg | 0x0fff0000, cbr + BMIPS_RAC_ADDRESS_RANGE);
+		__raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE);
+		break;
+
+	case CPU_BMIPS4380:
+		/* CBG workaround for early BMIPS4380 CPUs */
+		switch (read_c0_prid()) {
+		case 0x2a040:
+		case 0x2a042:
+		case 0x2a044:
+		case 0x2a060:
+			cfg = __raw_readl(cbr + BMIPS_L2_CONFIG);
+			__raw_writel(cfg & ~0x07000000, cbr + BMIPS_L2_CONFIG);
+			__raw_readl(cbr + BMIPS_L2_CONFIG);
+		}
+
+		/* clear BHTD to enable branch history table */
+		clear_c0_brcm_config_0(BIT(21));
+
+		/* XI/ROTR enable */
+		set_c0_brcm_config_0(BIT(23));
+		set_c0_brcm_cmt_ctrl(BIT(15));
+		break;
+
+	case CPU_BMIPS5000:
+		/* enable RDHWR, BRDHWR */
+		set_c0_brcm_config(BIT(17) | BIT(21));
+
+		/* Disable JTB */
+		__asm__ __volatile__(
+		"	.set	noreorder\n"
+		"	li	$8, 0x5a455048\n"
+		"	.word	0x4088b00f\n"	/* mtc0	t0, $22, 15 */
+		"	.word	0x4008b008\n"	/* mfc0	t0, $22, 8 */
+		"	li	$9, 0x00008000\n"
+		"	or	$8, $8, $9\n"
+		"	.word	0x4088b008\n"	/* mtc0	t0, $22, 8 */
+		"	sync\n"
+		"	li	$8, 0x0\n"
+		"	.word	0x4088b00f\n"	/* mtc0	t0, $22, 15 */
+		"	.set	reorder\n"
+		: : : "$8", "$9");
+
+		/* XI enable */
+		set_c0_brcm_config(BIT(27));
+
+		/* enable MIPS32R2 ROR instruction for XI TLB handlers */
+		__asm__ __volatile__(
+		"	li	$8, 0x5a455048\n"
+		"	.word	0x4088b00f\n"	/* mtc0 $8, $22, 15 */
+		"	nop; nop; nop\n"
+		"	.word	0x4008b008\n"	/* mfc0 $8, $22, 8 */
+		"	lui	$9, 0x0100\n"
+		"	or	$8, $9\n"
+		"	.word	0x4088b008\n"	/* mtc0 $8, $22, 8 */
+		: : : "$8", "$9");
+		break;
+	}
+}
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 253e140..1061bd2 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -27,15 +27,27 @@
 #include <asm/time.h>
 #include <asm/uasm.h>
 
+static bool threads_disabled;
 static DECLARE_BITMAP(core_power, NR_CPUS);
 
 struct core_boot_config *mips_cps_core_bootcfg;
 
+static int __init setup_nothreads(char *s)
+{
+	threads_disabled = true;
+	return 0;
+}
+early_param("nothreads", setup_nothreads);
+
 static unsigned core_vpe_count(unsigned core)
 {
 	unsigned cfg;
 
-	if (!config_enabled(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt)
+	if (threads_disabled)
+		return 1;
+
+	if ((!config_enabled(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt)
+		&& (!config_enabled(CONFIG_CPU_MIPSR6) || !cpu_has_vp))
 		return 1;
 
 	mips_cm_lock_other(core, 0);
@@ -47,11 +59,12 @@
 static void __init cps_smp_setup(void)
 {
 	unsigned int ncores, nvpes, core_vpes;
+	unsigned long core_entry;
 	int c, v;
 
 	/* Detect & record VPE topology */
 	ncores = mips_cm_numcores();
-	pr_info("VPE topology ");
+	pr_info("%s topology ", cpu_has_mips_r6 ? "VP" : "VPE");
 	for (c = nvpes = 0; c < ncores; c++) {
 		core_vpes = core_vpe_count(c);
 		pr_cont("%c%u", c ? ',' : '{', core_vpes);
@@ -62,7 +75,7 @@
 
 		for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
 			cpu_data[nvpes + v].core = c;
-#ifdef CONFIG_MIPS_MT_SMP
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_CPU_MIPSR6)
 			cpu_data[nvpes + v].vpe_id = v;
 #endif
 		}
@@ -91,6 +104,11 @@
 	/* Make core 0 coherent with everything */
 	write_gcr_cl_coherence(0xff);
 
+	if (mips_cm_revision() >= CM_REV_CM3) {
+		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
+		write_gcr_bev_base(core_entry);
+	}
+
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
@@ -213,6 +231,18 @@
 	if (mips_cpc_present()) {
 		/* Reset the core */
 		mips_cpc_lock_other(core);
+
+		if (mips_cm_revision() >= CM_REV_CM3) {
+			/* Run VP0 following the reset */
+			write_cpc_co_vp_run(0x1);
+
+			/*
+			 * Ensure that the VP_RUN register is written before the
+			 * core leaves reset.
+			 */
+			wmb();
+		}
+
 		write_cpc_co_cmd(CPC_Cx_CMD_RESET);
 
 		timeout = 100;
@@ -250,7 +280,10 @@
 
 static void remote_vpe_boot(void *dummy)
 {
-	mips_cps_boot_vpes();
+	unsigned core = current_cpu_data.core;
+	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
+
+	mips_cps_boot_vpes(core_cfg, cpu_vpe_id(&current_cpu_data));
 }
 
 static void cps_boot_secondary(int cpu, struct task_struct *idle)
@@ -259,6 +292,7 @@
 	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
 	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
 	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
+	unsigned long core_entry;
 	unsigned int remote;
 	int err;
 
@@ -276,6 +310,13 @@
 		goto out;
 	}
 
+	if (cpu_has_vp) {
+		mips_cm_lock_other(core, vpe_id);
+		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
+		write_gcr_co_reset_base(core_entry);
+		mips_cm_unlock_other();
+	}
+
 	if (core != current_cpu_data.core) {
 		/* Boot a VPE on another powered up core */
 		for (remote = 0; remote < NR_CPUS; remote++) {
@@ -293,10 +334,10 @@
 		goto out;
 	}
 
-	BUG_ON(!cpu_has_mipsmt);
+	BUG_ON(!cpu_has_mipsmt && !cpu_has_vp);
 
 	/* Boot a VPE on this core */
-	mips_cps_boot_vpes();
+	mips_cps_boot_vpes(core_cfg, vpe_id);
 out:
 	preempt_enable();
 }
@@ -307,6 +348,17 @@
 	if (cpu_has_mipsmt)
 		dmt();
 
+	if (mips_cm_revision() >= CM_REV_CM3) {
+		unsigned ident = gic_read_local_vp_id();
+
+		/*
+		 * Ensure that our calculation of the VP ID matches up with
+		 * what the GIC reports, otherwise we'll have configured
+		 * interrupts incorrectly.
+		 */
+		BUG_ON(ident != mips_cm_vp_id(smp_processor_id()));
+	}
+
 	change_c0_status(ST0_IM, STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP4 |
 				 STATUSF_IP5 | STATUSF_IP6 | STATUSF_IP7);
 }
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 27cb638..f9d01e9 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -243,18 +243,6 @@
 	struct irq_domain *ipidomain;
 	struct device_node *node;
 
-	/*
-	 * In some cases like qemu-malta, it is desired to try SMP with
-	 * a single core. Qemu-malta has no GIC, so an attempt to set any IPIs
-	 * would cause a BUG_ON() to be triggered since there's no ipidomain.
-	 *
-	 * Since for a single core system IPIs aren't required really, skip the
-	 * initialisation which should generally keep any such configurations
-	 * happy and only fail hard when trying to truely run SMP.
-	 */
-	if (cpumask_weight(cpu_possible_mask) == 1)
-		return 0;
-
 	node = of_irq_find_parent(of_root);
 	ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
 
@@ -266,7 +254,17 @@
 	if (node && !ipidomain)
 		ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
 
-	BUG_ON(!ipidomain);
+	/*
+	 * There are systems which only use IPI domains some of the time,
+	 * depending upon configuration we don't know until runtime. An
+	 * example is Malta where we may compile in support for GIC & the
+	 * MT ASE, but run on a system which has multiple VPEs in a single
+	 * core and doesn't include a GIC. Until all IPI implementations
+	 * have been converted to use IPI domains the best we can do here
+	 * is to return & hope some other code sets up the IPIs.
+	 */
+	if (!ipidomain)
+		return 0;
 
 	call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
 	BUG_ON(!call_virq);
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
index 8489c88..d6e6cf7 100644
--- a/arch/mips/kernel/spram.c
+++ b/arch/mips/kernel/spram.c
@@ -210,6 +210,7 @@
 	case CPU_P5600:
 	case CPU_QEMU_GENERIC:
 	case CPU_I6400:
+	case CPU_P6600:
 		config0 = read_c0_config();
 		/* FIXME: addresses are Malta specific */
 		if (config0 & (1<<24)) {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ae0c89d..4a1712b 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -145,7 +145,7 @@
 	if (!task)
 		task = current;
 
-	if (raw_show_trace || !__kernel_text_address(pc)) {
+	if (raw_show_trace || user_mode(regs) || !__kernel_text_address(pc)) {
 		show_raw_backtrace(sp);
 		return;
 	}
@@ -399,11 +399,8 @@
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
-	if (panic_on_oops) {
-		printk(KERN_EMERG "Fatal exception: panic in 5 seconds");
-		ssleep(5);
+	if (panic_on_oops)
 		panic("Fatal exception");
-	}
 
 	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
@@ -1249,7 +1246,7 @@
 		err = init_fpu();
 		if (msa && !err) {
 			enable_msa();
-			_init_msa_upper();
+			init_msa_upper();
 			set_thread_flag(TIF_USEDMSA);
 			set_thread_flag(TIF_MSA_CTX_LIVE);
 		}
@@ -1312,7 +1309,7 @@
 	 */
 	prior_msa = test_and_set_thread_flag(TIF_MSA_CTX_LIVE);
 	if (!prior_msa && was_fpu_owner) {
-		_init_msa_upper();
+		init_msa_upper();
 
 		goto out;
 	}
@@ -1329,7 +1326,7 @@
 		 * of each vector register such that it cannot see data left
 		 * behind by another task.
 		 */
-		_init_msa_upper();
+		init_msa_upper();
 	} else {
 		/* We need to restore the vector context. */
 		restore_msa(current);
@@ -1356,7 +1353,6 @@
 	unsigned long fcr31;
 	unsigned int cpid;
 	int status, err;
-	unsigned long __maybe_unused flags;
 	int sig;
 
 	prev_state = exception_enter();
@@ -1501,16 +1497,13 @@
 {
 	siginfo_t info = { .si_signo = SIGTRAP, .si_code = TRAP_HWBKPT };
 	enum ctx_state prev_state;
-	u32 cause;
 
 	prev_state = exception_enter();
 	/*
 	 * Clear WP (bit 22) bit of cause register so we don't loop
 	 * forever.
 	 */
-	cause = read_c0_cause();
-	cause &= ~(1 << 22);
-	write_c0_cause(cause);
+	clear_c0_cause(CAUSEF_WP);
 
 	/*
 	 * If the current thread has the watch registers loaded, save
@@ -1647,6 +1640,7 @@
 	case CPU_P5600:
 	case CPU_QEMU_GENERIC:
 	case CPU_I6400:
+	case CPU_P6600:
 		{
 #define ERRCTL_PE	0x80000000
 #define ERRCTL_L2P	0x00800000
@@ -1777,7 +1771,8 @@
 
 	/* For the moment, report the problem and hang. */
 	if ((cpu_has_mips_r2_r6) &&
-	    ((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_MIPS)) {
+	    (((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_MIPS) ||
+	    ((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_LOONGSON))) {
 		pr_err("FTLB error exception, cp0_ecc=0x%08x:\n",
 		       read_c0_ecc());
 		pr_err("cp0_errorepc == %0*lx\n", field, read_c0_errorepc());
@@ -2119,6 +2114,13 @@
 	 *  o read IntCtl.IPFDC to determine the fast debug channel interrupt
 	 */
 	if (cpu_has_mips_r2_r6) {
+		/*
+		 * We shouldn't trust a secondary core has a sane EBASE register
+		 * so use the one calculated by the boot CPU.
+		 */
+		if (!is_boot_cpu)
+			write_c0_ebase(ebase);
+
 		cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP;
 		cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7;
 		cp0_perfcount_irq = (read_c0_intctl() >> INTCTLB_IPPCI) & 7;
@@ -2134,7 +2136,7 @@
 	}
 
 	if (!cpu_data[cpu].asid_cache)
-		cpu_data[cpu].asid_cache = ASID_FIRST_VERSION;
+		cpu_data[cpu].asid_cache = asid_first_version(cpu);
 
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 5c62065..28b3af7 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -1191,6 +1191,7 @@
 	case ldc1_op:
 	case swc1_op:
 	case sdc1_op:
+	case cop1x_op:
 		die_if_kernel("Unaligned FP access in kernel code", regs);
 		BUG_ON(!used_math());
 
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 54d653e..a82c178 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -136,6 +136,27 @@
 #ifdef CONFIG_SMP
 	PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 #endif
+
+#ifdef CONFIG_RELOCATABLE
+	. = ALIGN(4);
+
+	.data.reloc : {
+		_relocation_start = .;
+		/*
+		 * Space for relocation table
+		 * This needs to be filled so that the
+		 * relocs tool can overwrite the content.
+		 * An invalid value is left at the start of the
+		 * section to abort relocation if the table
+		 * has not been filled in.
+		 */
+		LONG(0xFFFFFFFF);
+		FILL(0);
+		. += CONFIG_RELOCATION_TABLE_SIZE - 4;
+		_relocation_end = .;
+	}
+#endif
+
 #ifdef CONFIG_MIPS_RAW_APPENDED_DTB
 	__appended_dtb = .;
 	/* leave space for appended DTB */
diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c
index 2a03abb..19fcab7 100644
--- a/arch/mips/kernel/watch.c
+++ b/arch/mips/kernel/watch.c
@@ -15,10 +15,9 @@
  * Install the watch registers for the current thread.	A maximum of
  * four registers are installed although the machine may have more.
  */
-void mips_install_watch_registers(void)
+void mips_install_watch_registers(struct task_struct *t)
 {
-	struct mips3264_watch_reg_state *watches =
-		&current->thread.watch.mips3264;
+	struct mips3264_watch_reg_state *watches = &t->thread.watch.mips3264;
 	switch (current_cpu_data.watch_reg_use_cnt) {
 	default:
 		BUG();
@@ -26,16 +25,20 @@
 		write_c0_watchlo3(watches->watchlo[3]);
 		/* Write 1 to the I, R, and W bits to clear them, and
 		   1 to G so all ASIDs are trapped. */
-		write_c0_watchhi3(0x40000007 | watches->watchhi[3]);
+		write_c0_watchhi3(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
+				  watches->watchhi[3]);
 	case 3:
 		write_c0_watchlo2(watches->watchlo[2]);
-		write_c0_watchhi2(0x40000007 | watches->watchhi[2]);
+		write_c0_watchhi2(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
+				  watches->watchhi[2]);
 	case 2:
 		write_c0_watchlo1(watches->watchlo[1]);
-		write_c0_watchhi1(0x40000007 | watches->watchhi[1]);
+		write_c0_watchhi1(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
+				  watches->watchhi[1]);
 	case 1:
 		write_c0_watchlo0(watches->watchlo[0]);
-		write_c0_watchhi0(0x40000007 | watches->watchhi[0]);
+		write_c0_watchhi0(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
+				  watches->watchhi[0]);
 	}
 }
 
@@ -52,22 +55,26 @@
 	default:
 		BUG();
 	case 4:
-		watches->watchhi[3] = (read_c0_watchhi3() & 0x0fff);
+		watches->watchhi[3] = (read_c0_watchhi3() &
+				       (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
 	case 3:
-		watches->watchhi[2] = (read_c0_watchhi2() & 0x0fff);
+		watches->watchhi[2] = (read_c0_watchhi2() &
+				       (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
 	case 2:
-		watches->watchhi[1] = (read_c0_watchhi1() & 0x0fff);
+		watches->watchhi[1] = (read_c0_watchhi1() &
+				       (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
 	case 1:
-		watches->watchhi[0] = (read_c0_watchhi0() & 0x0fff);
+		watches->watchhi[0] = (read_c0_watchhi0() &
+				       (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
 	}
 	if (current_cpu_data.watch_reg_use_cnt == 1 &&
-	    (watches->watchhi[0] & 7) == 0) {
+	    (watches->watchhi[0] & MIPS_WATCHHI_IRW) == 0) {
 		/* Pathological case of release 1 architecture that
 		 * doesn't set the condition bits.  We assume that
 		 * since we got here, the watch condition was met and
 		 * signal that the conditions requested in watchlo
 		 * were met.  */
-		watches->watchhi[0] |= (watches->watchlo[0] & 7);
+		watches->watchhi[0] |= (watches->watchlo[0] & MIPS_WATCHHI_IRW);
 	}
  }
 
@@ -110,86 +117,86 @@
 	 * Check which of the I,R and W bits are supported, then
 	 * disable the register.
 	 */
-	write_c0_watchlo0(7);
+	write_c0_watchlo0(MIPS_WATCHLO_IRW);
 	back_to_back_c0_hazard();
 	t = read_c0_watchlo0();
 	write_c0_watchlo0(0);
-	c->watch_reg_masks[0] = t & 7;
+	c->watch_reg_masks[0] = t & MIPS_WATCHLO_IRW;
 
 	/* Write the mask bits and read them back to determine which
 	 * can be used. */
 	c->watch_reg_count = 1;
 	c->watch_reg_use_cnt = 1;
 	t = read_c0_watchhi0();
-	write_c0_watchhi0(t | 0xff8);
+	write_c0_watchhi0(t | MIPS_WATCHHI_MASK);
 	back_to_back_c0_hazard();
 	t = read_c0_watchhi0();
-	c->watch_reg_masks[0] |= (t & 0xff8);
-	if ((t & 0x80000000) == 0)
+	c->watch_reg_masks[0] |= (t & MIPS_WATCHHI_MASK);
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
-	write_c0_watchlo1(7);
+	write_c0_watchlo1(MIPS_WATCHLO_IRW);
 	back_to_back_c0_hazard();
 	t = read_c0_watchlo1();
 	write_c0_watchlo1(0);
-	c->watch_reg_masks[1] = t & 7;
+	c->watch_reg_masks[1] = t & MIPS_WATCHLO_IRW;
 
 	c->watch_reg_count = 2;
 	c->watch_reg_use_cnt = 2;
 	t = read_c0_watchhi1();
-	write_c0_watchhi1(t | 0xff8);
+	write_c0_watchhi1(t | MIPS_WATCHHI_MASK);
 	back_to_back_c0_hazard();
 	t = read_c0_watchhi1();
-	c->watch_reg_masks[1] |= (t & 0xff8);
-	if ((t & 0x80000000) == 0)
+	c->watch_reg_masks[1] |= (t & MIPS_WATCHHI_MASK);
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
-	write_c0_watchlo2(7);
+	write_c0_watchlo2(MIPS_WATCHLO_IRW);
 	back_to_back_c0_hazard();
 	t = read_c0_watchlo2();
 	write_c0_watchlo2(0);
-	c->watch_reg_masks[2] = t & 7;
+	c->watch_reg_masks[2] = t & MIPS_WATCHLO_IRW;
 
 	c->watch_reg_count = 3;
 	c->watch_reg_use_cnt = 3;
 	t = read_c0_watchhi2();
-	write_c0_watchhi2(t | 0xff8);
+	write_c0_watchhi2(t | MIPS_WATCHHI_MASK);
 	back_to_back_c0_hazard();
 	t = read_c0_watchhi2();
-	c->watch_reg_masks[2] |= (t & 0xff8);
-	if ((t & 0x80000000) == 0)
+	c->watch_reg_masks[2] |= (t & MIPS_WATCHHI_MASK);
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
-	write_c0_watchlo3(7);
+	write_c0_watchlo3(MIPS_WATCHLO_IRW);
 	back_to_back_c0_hazard();
 	t = read_c0_watchlo3();
 	write_c0_watchlo3(0);
-	c->watch_reg_masks[3] = t & 7;
+	c->watch_reg_masks[3] = t & MIPS_WATCHLO_IRW;
 
 	c->watch_reg_count = 4;
 	c->watch_reg_use_cnt = 4;
 	t = read_c0_watchhi3();
-	write_c0_watchhi3(t | 0xff8);
+	write_c0_watchhi3(t | MIPS_WATCHHI_MASK);
 	back_to_back_c0_hazard();
 	t = read_c0_watchhi3();
-	c->watch_reg_masks[3] |= (t & 0xff8);
-	if ((t & 0x80000000) == 0)
+	c->watch_reg_masks[3] |= (t & MIPS_WATCHHI_MASK);
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
 	/* We use at most 4, but probe and report up to 8. */
 	c->watch_reg_count = 5;
 	t = read_c0_watchhi4();
-	if ((t & 0x80000000) == 0)
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
 	c->watch_reg_count = 6;
 	t = read_c0_watchhi5();
-	if ((t & 0x80000000) == 0)
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
 	c->watch_reg_count = 7;
 	t = read_c0_watchhi6();
-	if ((t & 0x80000000) == 0)
+	if ((t & MIPS_WATCHHI_M) == 0)
 		return;
 
 	c->watch_reg_count = 8;
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index b37954c..396df6e 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -302,12 +302,31 @@
  */
 static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
 {
-	ktime_t expires;
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	ktime_t expires, threshold;
+	uint32_t count, compare;
 	int running;
 
-	/* Is the hrtimer pending? */
+	/* Calculate the biased and scaled guest CP0_Count */
+	count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+	compare = kvm_read_c0_guest_compare(cop0);
+
+	/*
+	 * Find whether CP0_Count has reached the closest timer interrupt. If
+	 * not, we shouldn't inject it.
+	 */
+	if ((int32_t)(count - compare) < 0)
+		return count;
+
+	/*
+	 * The CP0_Count we're going to return has already reached the closest
+	 * timer interrupt. Quickly check if it really is a new interrupt by
+	 * looking at whether the interval until the hrtimer expiry time is
+	 * less than 1/4 of the timer period.
+	 */
 	expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
-	if (ktime_compare(now, expires) >= 0) {
+	threshold = ktime_add_ns(now, vcpu->arch.count_period / 4);
+	if (ktime_before(expires, threshold)) {
 		/*
 		 * Cancel it while we handle it so there's no chance of
 		 * interference with the timeout handler.
@@ -329,8 +348,7 @@
 		}
 	}
 
-	/* Return the biased and scaled guest CP0_Count */
-	return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+	return count;
 }
 
 /**
@@ -420,32 +438,6 @@
 }
 
 /**
- * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
- * @vcpu:	Virtual CPU.
- *
- * Recalculates and updates the expiry time of the hrtimer. This can be used
- * after timer parameters have been altered which do not depend on the time that
- * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
- * kvm_mips_resume_hrtimer() are used directly).
- *
- * It is guaranteed that no timer interrupts will be lost in the process.
- *
- * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
- */
-static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
-{
-	ktime_t now;
-	uint32_t count;
-
-	/*
-	 * freeze_hrtimer takes care of a timer interrupts <= count, and
-	 * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
-	 */
-	now = kvm_mips_freeze_hrtimer(vcpu, &count);
-	kvm_mips_resume_hrtimer(vcpu, now, count);
-}
-
-/**
  * kvm_mips_write_count() - Modify the count and update timer.
  * @vcpu:	Virtual CPU.
  * @count:	Guest CP0_Count value to set.
@@ -540,23 +532,42 @@
  * kvm_mips_write_compare() - Modify compare and update timer.
  * @vcpu:	Virtual CPU.
  * @compare:	New CP0_Compare value.
+ * @ack:	Whether to acknowledge timer interrupt.
  *
  * Update CP0_Compare to a new value and update the timeout.
+ * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure
+ * any pending timer interrupt is preserved.
  */
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	int dc;
+	u32 old_compare = kvm_read_c0_guest_compare(cop0);
+	ktime_t now;
+	uint32_t count;
 
 	/* if unchanged, must just be an ack */
-	if (kvm_read_c0_guest_compare(cop0) == compare)
+	if (old_compare == compare) {
+		if (!ack)
+			return;
+		kvm_mips_callbacks->dequeue_timer_int(vcpu);
+		kvm_write_c0_guest_compare(cop0, compare);
 		return;
+	}
 
-	/* Update compare */
+	/* freeze_hrtimer() takes care of timer interrupts <= count */
+	dc = kvm_mips_count_disabled(vcpu);
+	if (!dc)
+		now = kvm_mips_freeze_hrtimer(vcpu, &count);
+
+	if (ack)
+		kvm_mips_callbacks->dequeue_timer_int(vcpu);
+
 	kvm_write_c0_guest_compare(cop0, compare);
 
-	/* Update timeout if count enabled */
-	if (!kvm_mips_count_disabled(vcpu))
-		kvm_mips_update_hrtimer(vcpu);
+	/* resume_hrtimer() takes care of timer interrupts > count */
+	if (!dc)
+		kvm_mips_resume_hrtimer(vcpu, now, count);
 }
 
 /**
@@ -1068,15 +1079,15 @@
 					kvm_read_c0_guest_ebase(cop0));
 			} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
 				uint32_t nasid =
-					vcpu->arch.gprs[rt] & ASID_MASK;
+					vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
 				if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) &&
 				    ((kvm_read_c0_guest_entryhi(cop0) &
-				      ASID_MASK) != nasid)) {
+				      KVM_ENTRYHI_ASID) != nasid)) {
 					kvm_debug("MTCz, change ASID from %#lx to %#lx\n",
 						kvm_read_c0_guest_entryhi(cop0)
-						& ASID_MASK,
+						& KVM_ENTRYHI_ASID,
 						vcpu->arch.gprs[rt]
-						& ASID_MASK);
+						& KVM_ENTRYHI_ASID);
 
 					/* Blow away the shadow host TLBs */
 					kvm_mips_flush_host_tlb(1);
@@ -1095,9 +1106,9 @@
 
 				/* If we are writing to COMPARE */
 				/* Clear pending timer interrupt, if any */
-				kvm_mips_callbacks->dequeue_timer_int(vcpu);
 				kvm_mips_write_compare(vcpu,
-						       vcpu->arch.gprs[rt]);
+						       vcpu->arch.gprs[rt],
+						       true);
 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
 				unsigned int old_val, val, change;
 
@@ -1620,7 +1631,7 @@
 		 */
 		index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) |
 						  (kvm_read_c0_guest_entryhi
-						   (cop0) & ASID_MASK));
+						   (cop0) & KVM_ENTRYHI_ASID));
 
 		if (index < 0) {
 			vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK);
@@ -1786,7 +1797,7 @@
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	unsigned long entryhi = (vcpu->arch.  host_cp0_badvaddr & VPN2_MASK) |
-				(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+			(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 
 	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
 		/* save old pc */
@@ -1833,7 +1844,7 @@
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	unsigned long entryhi =
 		(vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
-		(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+		(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 
 	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
 		/* save old pc */
@@ -1878,7 +1889,7 @@
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
-				(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+			(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 
 	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
 		/* save old pc */
@@ -1922,7 +1933,7 @@
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
-		(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+		(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 
 	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
 		/* save old pc */
@@ -1967,7 +1978,7 @@
 #ifdef DEBUG
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
-				(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+			(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 	int index;
 
 	/* If address not in the guest TLB, then we are in trouble */
@@ -1994,7 +2005,7 @@
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
-				(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+			(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 
 	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
@@ -2569,7 +2580,8 @@
 	 */
 	index = kvm_mips_guest_tlb_lookup(vcpu,
 		      (va & VPN2_MASK) |
-		      (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & ASID_MASK));
+		      (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
+		       KVM_ENTRYHI_ASID));
 	if (index < 0) {
 		if (exccode == EXCCODE_TLBL) {
 			er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 81687ab..3ef0300 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -32,7 +32,6 @@
     EXPORT(x);
 
 /* Overload, Danger Will Robinson!! */
-#define PT_HOST_ASID        PT_BVADDR
 #define PT_HOST_USERLOCAL   PT_EPC
 
 #define CP0_DDATA_LO        $28,3
@@ -49,45 +48,18 @@
  * a1: vcpu
  */
 	.set	noreorder
-	.set	noat
 
 FEXPORT(__kvm_mips_vcpu_run)
 	/* k0/k1 not being used in host kernel context */
 	INT_ADDIU k1, sp, -PT_SIZE
-	LONG_S	$0, PT_R0(k1)
-	LONG_S	$1, PT_R1(k1)
-	LONG_S	$2, PT_R2(k1)
-	LONG_S	$3, PT_R3(k1)
-
-	LONG_S	$4, PT_R4(k1)
-	LONG_S	$5, PT_R5(k1)
-	LONG_S	$6, PT_R6(k1)
-	LONG_S	$7, PT_R7(k1)
-
-	LONG_S	$8,  PT_R8(k1)
-	LONG_S	$9,  PT_R9(k1)
-	LONG_S	$10, PT_R10(k1)
-	LONG_S	$11, PT_R11(k1)
-	LONG_S	$12, PT_R12(k1)
-	LONG_S	$13, PT_R13(k1)
-	LONG_S	$14, PT_R14(k1)
-	LONG_S	$15, PT_R15(k1)
 	LONG_S	$16, PT_R16(k1)
 	LONG_S	$17, PT_R17(k1)
-
 	LONG_S	$18, PT_R18(k1)
 	LONG_S	$19, PT_R19(k1)
 	LONG_S	$20, PT_R20(k1)
 	LONG_S	$21, PT_R21(k1)
 	LONG_S	$22, PT_R22(k1)
 	LONG_S	$23, PT_R23(k1)
-	LONG_S	$24, PT_R24(k1)
-	LONG_S	$25, PT_R25(k1)
-
-	/*
-	 * XXXKYMA k0/k1 not saved, not being used if we got here through
-	 * an ioctl()
-	 */
 
 	LONG_S	$28, PT_R28(k1)
 	LONG_S	$29, PT_R29(k1)
@@ -104,11 +76,6 @@
 	mfc0	v0, CP0_STATUS
 	LONG_S	v0, PT_STATUS(k1)
 
-	/* Save host ASID, shove it into the BVADDR location */
-	mfc0	v1, CP0_ENTRYHI
-	andi	v1, 0xff
-	LONG_S	v1, PT_HOST_ASID(k1)
-
 	/* Save DDATA_LO, will be used to store pointer to vcpu */
 	mfc0	v1, CP0_DDATA_LO
 	LONG_S	v1, PT_HOST_USERLOCAL(k1)
@@ -170,13 +137,21 @@
 	INT_SLL	t2, t2, 2                   /* x4 */
 	REG_ADDU t3, t1, t2
 	LONG_L	k0, (t3)
-	andi	k0, k0, 0xff
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	li	t3, CPUINFO_SIZE/4
+	mul	t2, t2, t3		/* x sizeof(struct cpuinfo_mips)/4 */
+	LONG_L	t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
+	and	k0, k0, t2
+#else
+	andi	k0, k0, MIPS_ENTRYHI_ASID
+#endif
 	mtc0	k0, CP0_ENTRYHI
 	ehb
 
 	/* Disable RDHWR access */
 	mtc0	zero, CP0_HWRENA
 
+	.set	noat
 	/* Now load up the Guest Context from VCPU */
 	LONG_L	$1, VCPU_R1(k1)
 	LONG_L	$2, VCPU_R2(k1)
@@ -288,6 +263,8 @@
 	LONG_S	$30, VCPU_R30(k1)
 	LONG_S	$31, VCPU_R31(k1)
 
+	.set at
+
 	/* We need to save hi/lo and restore them on the way out */
 	mfhi	t0
 	LONG_S	t0, VCPU_HI(k1)
@@ -339,9 +316,7 @@
 	/* load up the host EBASE */
 	mfc0	v0, CP0_STATUS
 
-	.set	at
 	or	k0, v0, ST0_BEV
-	.set	noat
 
 	mtc0	k0, CP0_STATUS
 	ehb
@@ -353,7 +328,6 @@
 	 * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
 	 * trigger FPE for pending exceptions.
 	 */
-	.set	at
 	and	v1, v0, ST0_CU1
 	beqz	v1, 1f
 	 nop
@@ -363,7 +337,6 @@
 	sw	t0, VCPU_FCR31(k1)
 	ctc1	zero,fcr31
 	.set	pop
-	.set	noat
 1:
 
 #ifdef CONFIG_CPU_HAS_MSA
@@ -386,10 +359,8 @@
 #endif
 
 	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-	.set	at
 	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
 	or	v0, v0, ST0_CU0
-	.set	noat
 	mtc0	v0, CP0_STATUS
 	ehb
 
@@ -456,18 +427,14 @@
 
 	/* Switch EBASE back to the one used by KVM */
 	mfc0	v1, CP0_STATUS
-	.set	at
 	or	k0, v1, ST0_BEV
-	.set	noat
 	mtc0	k0, CP0_STATUS
 	ehb
 	mtc0	t0, CP0_EBASE
 
 	/* Setup status register for running guest in UM */
-	.set	at
 	or	v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
 	and	v1, v1, ~(ST0_CU0 | ST0_MX)
-	.set	noat
 	mtc0	v1, CP0_STATUS
 	ehb
 
@@ -489,13 +456,21 @@
 	INT_SLL	t2, t2, 2		/* x4 */
 	REG_ADDU t3, t1, t2
 	LONG_L	k0, (t3)
-	andi	k0, k0, 0xff
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	li	t3, CPUINFO_SIZE/4
+	mul	t2, t2, t3		/* x sizeof(struct cpuinfo_mips)/4 */
+	LONG_L	t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
+	and	k0, k0, t2
+#else
+	andi	k0, k0, MIPS_ENTRYHI_ASID
+#endif
 	mtc0	k0, CP0_ENTRYHI
 	ehb
 
 	/* Disable RDHWR access */
 	mtc0	zero, CP0_HWRENA
 
+	.set	noat
 	/* load the guest context from VCPU and return */
 	LONG_L	$0, VCPU_R0(k1)
 	LONG_L	$1, VCPU_R1(k1)
@@ -541,6 +516,7 @@
 	LONG_L	k1, VCPU_R27(k1)
 
 	eret
+	.set	at
 
 __kvm_mips_return_to_host:
 	/* EBASE is already pointing to Linux */
@@ -551,16 +527,6 @@
 	LONG_L	k0, PT_HOST_USERLOCAL(k1)
 	mtc0	k0, CP0_DDATA_LO
 
-	/* Restore host ASID */
-	LONG_L	k0, PT_HOST_ASID(sp)
-	andi	k0, 0xff
-	mtc0	k0,CP0_ENTRYHI
-	ehb
-
-	/* Load context saved on the host stack */
-	LONG_L	$0, PT_R0(k1)
-	LONG_L	$1, PT_R1(k1)
-
 	/*
 	 * r2/v0 is the return code, shift it down by 2 (arithmetic)
 	 * to recover the err code
@@ -568,19 +534,7 @@
 	INT_SRA	k0, v0, 2
 	move	$2, k0
 
-	LONG_L	$3, PT_R3(k1)
-	LONG_L	$4, PT_R4(k1)
-	LONG_L	$5, PT_R5(k1)
-	LONG_L	$6, PT_R6(k1)
-	LONG_L	$7, PT_R7(k1)
-	LONG_L	$8, PT_R8(k1)
-	LONG_L	$9, PT_R9(k1)
-	LONG_L	$10, PT_R10(k1)
-	LONG_L	$11, PT_R11(k1)
-	LONG_L	$12, PT_R12(k1)
-	LONG_L	$13, PT_R13(k1)
-	LONG_L	$14, PT_R14(k1)
-	LONG_L	$15, PT_R15(k1)
+	/* Load context saved on the host stack */
 	LONG_L	$16, PT_R16(k1)
 	LONG_L	$17, PT_R17(k1)
 	LONG_L	$18, PT_R18(k1)
@@ -589,10 +543,6 @@
 	LONG_L	$21, PT_R21(k1)
 	LONG_L	$22, PT_R22(k1)
 	LONG_L	$23, PT_R23(k1)
-	LONG_L	$24, PT_R24(k1)
-	LONG_L	$25, PT_R25(k1)
-
-	/* Host k0/k1 were not saved */
 
 	LONG_L	$28, PT_R28(k1)
 	LONG_L	$29, PT_R29(k1)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 70ef1a4..dc052fb 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -56,6 +56,7 @@
 	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
+	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
 	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
 	{NULL}
 };
@@ -1079,7 +1080,8 @@
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 	case KVM_CAP_MIPS_FPU:
-		r = !!cpu_has_fpu;
+		/* We don't handle systems with inconsistent cpu_has_fpu */
+		r = !!raw_cpu_has_fpu;
 		break;
 	case KVM_CAP_MIPS_MSA:
 		/*
@@ -1555,8 +1557,10 @@
 
 		/* Disable MSA & FPU */
 		disable_msa();
-		if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+		if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
 			clear_c0_status(ST0_CU1 | ST0_FR);
+			disable_fpu_hazard();
+		}
 		vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
 	} else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
 		set_c0_status(ST0_CU1);
@@ -1567,6 +1571,7 @@
 
 		/* Disable FPU */
 		clear_c0_status(ST0_CU1 | ST0_FR);
+		disable_fpu_hazard();
 	}
 	preempt_enable();
 }
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index e0e1d0a..ed021ae 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -49,12 +49,18 @@
 
 uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.guest_kernel_asid[smp_processor_id()] & ASID_MASK;
+	int cpu = smp_processor_id();
+
+	return vcpu->arch.guest_kernel_asid[cpu] &
+			cpu_asid_mask(&cpu_data[cpu]);
 }
 
 uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.guest_user_asid[smp_processor_id()] & ASID_MASK;
+	int cpu = smp_processor_id();
+
+	return vcpu->arch.guest_user_asid[cpu] &
+			cpu_asid_mask(&cpu_data[cpu]);
 }
 
 inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
@@ -78,7 +84,8 @@
 	old_pagemask = read_c0_pagemask();
 
 	kvm_info("HOST TLBs:\n");
-	kvm_info("ASID: %#lx\n", read_c0_entryhi() & ASID_MASK);
+	kvm_info("ASID: %#lx\n", read_c0_entryhi() &
+		 cpu_asid_mask(&current_cpu_data));
 
 	for (i = 0; i < current_cpu_data.tlbsize; i++) {
 		write_c0_index(i);
@@ -268,6 +275,7 @@
 	int even;
 	struct kvm *kvm = vcpu->kvm;
 	const int flush_dcache_mask = 0;
+	int ret;
 
 	if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
 		kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
@@ -299,14 +307,18 @@
 		pfn1 = kvm->arch.guest_pmap[gfn];
 	}
 
-	entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
 	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
 		   (1 << 2) | (0x1 << 1);
 	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
 		   (1 << 2) | (0x1 << 1);
 
-	return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
-				       flush_dcache_mask);
+	preempt_disable();
+	entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
+	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+				      flush_dcache_mask);
+	preempt_enable();
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
 
@@ -361,6 +373,7 @@
 	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
 	struct kvm *kvm = vcpu->kvm;
 	kvm_pfn_t pfn0, pfn1;
+	int ret;
 
 	if ((tlb->tlb_hi & VPN2_MASK) == 0) {
 		pfn0 = 0;
@@ -387,9 +400,6 @@
 		*hpa1 = pfn1 << PAGE_SHIFT;
 
 	/* Get attributes from the Guest TLB */
-	entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
-					       kvm_mips_get_kernel_asid(vcpu) :
-					       kvm_mips_get_user_asid(vcpu));
 	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
 		   (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
 	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
@@ -398,8 +408,15 @@
 	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
 		  tlb->tlb_lo0, tlb->tlb_lo1);
 
-	return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
-				       tlb->tlb_mask);
+	preempt_disable();
+	entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
+					       kvm_mips_get_kernel_asid(vcpu) :
+					       kvm_mips_get_user_asid(vcpu));
+	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+				      tlb->tlb_mask);
+	preempt_enable();
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
 
@@ -564,15 +581,15 @@
 {
 	unsigned long asid = asid_cache(cpu);
 
-	asid += ASID_INC;
-	if (!(asid & ASID_MASK)) {
+	asid += cpu_asid_inc();
+	if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) {
 		if (cpu_has_vtag_icache)
 			flush_icache_all();
 
 		kvm_local_flush_tlb_all();      /* start new asid cycle */
 
 		if (!asid)      /* fix version if needed */
-			asid = ASID_FIRST_VERSION;
+			asid = asid_first_version(cpu);
 	}
 
 	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
@@ -627,6 +644,7 @@
 /* Restore ASID once we are scheduled back after preemption */
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
 	unsigned long flags;
 	int newasid = 0;
 
@@ -637,7 +655,7 @@
 	local_irq_save(flags);
 
 	if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
-							ASID_VERSION_MASK) {
+						asid_version_mask(cpu)) {
 		kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
 		vcpu->arch.guest_kernel_asid[cpu] =
 		    vcpu->arch.guest_kernel_mm.context.asid[cpu];
@@ -672,7 +690,7 @@
 		 */
 		if (current->flags & PF_VCPU) {
 			write_c0_entryhi(vcpu->arch.
-					 preempt_entryhi & ASID_MASK);
+					 preempt_entryhi & asid_mask);
 			ehb();
 		}
 	} else {
@@ -687,11 +705,11 @@
 			if (KVM_GUEST_KERNEL_MODE(vcpu))
 				write_c0_entryhi(vcpu->arch.
 						 guest_kernel_asid[cpu] &
-						 ASID_MASK);
+						 asid_mask);
 			else
 				write_c0_entryhi(vcpu->arch.
 						 guest_user_asid[cpu] &
-						 ASID_MASK);
+						 asid_mask);
 			ehb();
 		}
 	}
@@ -721,7 +739,7 @@
 	kvm_mips_callbacks->vcpu_get_regs(vcpu);
 
 	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
-	     ASID_VERSION_MASK)) {
+	     asid_version_mask(cpu))) {
 		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
 			  cpu_context(cpu, current->mm));
 		drop_mmu_context(current->mm, cpu);
@@ -748,7 +766,8 @@
 			inst = *(opc);
 		} else {
 			vpn2 = (unsigned long) opc & VPN2_MASK;
-			asid = kvm_read_c0_guest_entryhi(cop0) & ASID_MASK;
+			asid = kvm_read_c0_guest_entryhi(cop0) &
+						KVM_ENTRYHI_ASID;
 			index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid);
 			if (index < 0) {
 				kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n",
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index c4038d2..6ba0faf 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -505,7 +505,8 @@
 	kvm_write_c0_guest_intctl(cop0, 0xFC000000);
 
 	/* Put in vcpu id as CPUNum into Ebase Reg to handle SMP Guests */
-	kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 | (vcpu_id & 0xFF));
+	kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 |
+				       (vcpu_id & MIPS_EBASE_CPUNUM));
 
 	return 0;
 }
@@ -546,7 +547,7 @@
 		kvm_mips_write_count(vcpu, v);
 		break;
 	case KVM_REG_MIPS_CP0_COMPARE:
-		kvm_mips_write_compare(vcpu, v);
+		kvm_mips_write_compare(vcpu, v, false);
 		break;
 	case KVM_REG_MIPS_CP0_CAUSE:
 		/*
diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
index e10d333..177769d 100644
--- a/arch/mips/lantiq/Kconfig
+++ b/arch/mips/lantiq/Kconfig
@@ -25,7 +25,17 @@
 endchoice
 
 choice
-	prompt "Devicetree"
+	prompt "Built-in device tree"
+	help
+	  Legacy bootloaders do not pass a DTB pointer to the kernel, so
+	  if a "wrapper" is not being used, the kernel will need to include
+	  a device tree that matches the target board.
+
+	  The builtin DTB will only be used if the firmware does not supply
+	  a valid DTB.
+
+config LANTIQ_DT_NONE
+	bool "None"
 
 config DT_EASY50712
 	bool "Easy50712"
diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
index 690257a..2718652 100644
--- a/arch/mips/lantiq/Makefile
+++ b/arch/mips/lantiq/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+# Copyright (C) 2010 John Crispin <john@phrozen.org>
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License version 2 as published
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index a0706fd..149f051 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -4,7 +4,7 @@
  *  by the Free Software Foundation.
  *
  * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 #include <linux/io.h>
 #include <linux/export.h>
diff --git a/arch/mips/lantiq/clk.h b/arch/mips/lantiq/clk.h
index 7376ce8..e806e04 100644
--- a/arch/mips/lantiq/clk.h
+++ b/arch/mips/lantiq/clk.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LTQ_CLK_H__
diff --git a/arch/mips/lantiq/early_printk.c b/arch/mips/lantiq/early_printk.c
index 9b28d09..44bccae 100644
--- a/arch/mips/lantiq/early_printk.c
+++ b/arch/mips/lantiq/early_printk.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #include <linux/cpu.h>
diff --git a/arch/mips/lantiq/falcon/prom.c b/arch/mips/lantiq/falcon/prom.c
index aa94979..75315c0 100644
--- a/arch/mips/lantiq/falcon/prom.c
+++ b/arch/mips/lantiq/falcon/prom.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2012 Thomas Langer <thomas.langer@lantiq.com>
- * Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/lantiq/falcon/reset.c b/arch/mips/lantiq/falcon/reset.c
index 5682482..7a535d7 100644
--- a/arch/mips/lantiq/falcon/reset.c
+++ b/arch/mips/lantiq/falcon/reset.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2012 Thomas Langer <thomas.langer@lantiq.com>
- * Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #include <linux/init.h>
diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c
index 7edcd49..2a1b302 100644
--- a/arch/mips/lantiq/falcon/sysctrl.c
+++ b/arch/mips/lantiq/falcon/sysctrl.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2011 Thomas Langer <thomas.langer@lantiq.com>
- * Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2011 John Crispin <john@phrozen.org>
  */
 
 #include <linux/ioport.h>
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 2e7f60c..ff17669e 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
  */
 
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index 297bcaa..5f693ac7 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #include <linux/export.h>
@@ -65,6 +65,8 @@
 
 void __init plat_mem_setup(void)
 {
+	void *dtb;
+
 	ioport_resource.start = IOPORT_RESOURCE_START;
 	ioport_resource.end = IOPORT_RESOURCE_END;
 	iomem_resource.start = IOMEM_RESOURCE_START;
@@ -72,11 +74,18 @@
 
 	set_io_port_base((unsigned long) KSEG1);
 
+	if (fw_arg0 == -2) /* UHI interface */
+		dtb = (void *)fw_arg1;
+	else if (__dtb_start != __dtb_end)
+		dtb = (void *)__dtb_start;
+	else
+		panic("no dtb found");
+
 	/*
-	 * Load the builtin devicetree. This causes the chosen node to be
+	 * Load the devicetree. This causes the chosen node to be
 	 * parsed resulting in our memory appearing
 	 */
-	__dt_setup_arch(__dtb_start);
+	__dt_setup_arch(dtb);
 }
 
 void __init device_tree_init(void)
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
index bfd2d58..4b6576c 100644
--- a/arch/mips/lantiq/prom.h
+++ b/arch/mips/lantiq/prom.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LTQ_PROM_H__
diff --git a/arch/mips/lantiq/xway/clk.c b/arch/mips/lantiq/xway/clk.c
index 07f6d5b..41fc30d 100644
--- a/arch/mips/lantiq/xway/clk.c
+++ b/arch/mips/lantiq/xway/clk.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  *  Copyright (C) 2013-2015 Lantiq Beteiligungs-GmbH & Co.KG
  */
 
diff --git a/arch/mips/lantiq/xway/dcdc.c b/arch/mips/lantiq/xway/dcdc.c
index ae8e930..08f7aba 100644
--- a/arch/mips/lantiq/xway/dcdc.c
+++ b/arch/mips/lantiq/xway/dcdc.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  *  Copyright (C) 2010 Sameer Ahmad, Lantiq GmbH
  */
 
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
index 34a116e..cef8117 100644
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -12,7 +12,7 @@
  *   along with this program; if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  *
- *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ *   Copyright (C) 2011 John Crispin <john@phrozen.org>
  */
 
 #include <linux/init.h>
diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c
index f1492b2..0f1bbea 100644
--- a/arch/mips/lantiq/xway/gptu.c
+++ b/arch/mips/lantiq/xway/gptu.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  *  Copyright (C) 2012 Lantiq GmbH
  */
 
diff --git a/arch/mips/lantiq/xway/prom.c b/arch/mips/lantiq/xway/prom.c
index 8f6e02f..9475b25 100644
--- a/arch/mips/lantiq/xway/prom.c
+++ b/arch/mips/lantiq/xway/prom.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  *  Copyright (C) 2013-2015 Lantiq Beteiligungs-GmbH & Co.KG
  */
 
diff --git a/arch/mips/lantiq/xway/reset.c b/arch/mips/lantiq/xway/reset.c
index bc29bb3..83fd65d 100644
--- a/arch/mips/lantiq/xway/reset.c
+++ b/arch/mips/lantiq/xway/reset.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  *  Copyright (C) 2013-2015 Lantiq Beteiligungs-GmbH & Co.KG
  */
 
@@ -258,7 +258,7 @@
 	return ltq_deassert_device(rcdev, id);
 }
 
-static struct reset_control_ops reset_ops = {
+static const struct reset_control_ops reset_ops = {
 	.reset = ltq_reset_device,
 	.assert = ltq_assert_device,
 	.deassert = ltq_deassert_device,
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 80554e8..236193b 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2011-2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2011-2012 John Crispin <john@phrozen.org>
  *  Copyright (C) 2013-2015 Lantiq Beteiligungs-GmbH & Co.KG
  */
 
diff --git a/arch/mips/lantiq/xway/vmmc.c b/arch/mips/lantiq/xway/vmmc.c
index d001bc3..4625495 100644
--- a/arch/mips/lantiq/xway/vmmc.c
+++ b/arch/mips/lantiq/xway/vmmc.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #include <linux/module.h>
diff --git a/arch/mips/lantiq/xway/xrx200_phy_fw.c b/arch/mips/lantiq/xway/xrx200_phy_fw.c
index 199094a..71e518c 100644
--- a/arch/mips/lantiq/xway/xrx200_phy_fw.c
+++ b/arch/mips/lantiq/xway/xrx200_phy_fw.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #include <linux/delay.h>
@@ -112,6 +112,6 @@
 
 module_platform_driver(xway_phy_driver);
 
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_AUTHOR("John Crispin <john@phrozen.org>");
 MODULE_DESCRIPTION("Lantiq XRX200 PHY Firmware Loader");
 MODULE_LICENSE("GPL");
diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c
index 92a3731..0f80b93 100644
--- a/arch/mips/lib/dump_tlb.c
+++ b/arch/mips/lib/dump_tlb.c
@@ -19,6 +19,8 @@
 
 	pr_info("Index    : %0x\n", read_c0_index());
 	pr_info("PageMask : %0x\n", read_c0_pagemask());
+	if (cpu_has_guestid)
+		pr_info("GuestCtl1: %0x\n", read_c0_guestctl1());
 	pr_info("EntryHi  : %0*lx\n", field, read_c0_entryhi());
 	pr_info("EntryLo0 : %0*lx\n", field, read_c0_entrylo0());
 	pr_info("EntryLo1 : %0*lx\n", field, read_c0_entrylo1());
@@ -72,7 +74,10 @@
 {
 	unsigned long s_entryhi, entryhi, asid;
 	unsigned long long entrylo0, entrylo1, pa;
-	unsigned int s_index, s_pagemask, pagemask, c0, c1, i;
+	unsigned int s_index, s_pagemask, s_guestctl1 = 0;
+	unsigned int pagemask, guestctl1 = 0, c0, c1, i;
+	unsigned long asidmask = cpu_asid_mask(&current_cpu_data);
+	int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4);
 #ifdef CONFIG_32BIT
 	bool xpa = cpu_has_xpa && (read_c0_pagegrain() & PG_ELPA);
 	int pwidth = xpa ? 11 : 8;
@@ -86,7 +91,9 @@
 	s_pagemask = read_c0_pagemask();
 	s_entryhi = read_c0_entryhi();
 	s_index = read_c0_index();
-	asid = s_entryhi & 0xff;
+	asid = s_entryhi & asidmask;
+	if (cpu_has_guestid)
+		s_guestctl1 = read_c0_guestctl1();
 
 	for (i = first; i <= last; i++) {
 		write_c0_index(i);
@@ -97,6 +104,8 @@
 		entryhi	 = read_c0_entryhi();
 		entrylo0 = read_c0_entrylo0();
 		entrylo1 = read_c0_entrylo1();
+		if (cpu_has_guestid)
+			guestctl1 = read_c0_guestctl1();
 
 		/* EHINV bit marks entire entry as invalid */
 		if (cpu_has_tlbinv && entryhi & MIPS_ENTRYHI_EHINV)
@@ -115,7 +124,7 @@
 		 * due to duplicate TLB entry.
 		 */
 		if (!((entrylo0 | entrylo1) & ENTRYLO_G) &&
-		    (entryhi & 0xff) != asid)
+		    (entryhi & asidmask) != asid)
 			continue;
 
 		/*
@@ -126,15 +135,19 @@
 		c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 		c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 
-		printk("va=%0*lx asid=%02lx\n",
+		printk("va=%0*lx asid=%0*lx",
 		       vwidth, (entryhi & ~0x1fffUL),
-		       entryhi & 0xff);
+		       asidwidth, entryhi & asidmask);
+		if (cpu_has_guestid)
+			printk(" gid=%02lx",
+			       (guestctl1 & MIPS_GCTL1_RID)
+					>> MIPS_GCTL1_RID_SHIFT);
 		/* RI/XI are in awkward places, so mask them off separately */
 		pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		if (xpa)
 			pa |= (unsigned long long)readx_c0_entrylo0() << 30;
 		pa = (pa << 6) & PAGE_MASK;
-		printk("\t[");
+		printk("\n\t[");
 		if (cpu_has_rixi)
 			printk("ri=%d xi=%d ",
 			       (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
@@ -164,6 +177,8 @@
 	write_c0_entryhi(s_entryhi);
 	write_c0_index(s_index);
 	write_c0_pagemask(s_pagemask);
+	if (cpu_has_guestid)
+		write_c0_guestctl1(s_guestctl1);
 }
 
 void dump_tlb_all(void)
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 8f0019a..18a1ccd 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -228,10 +228,12 @@
 	.hidden __memset
 	.endif
 
+#ifdef CONFIG_CPU_MIPSR6
 .Lbyte_fixup\@:
 	PTR_SUBU	a2, $0, t0
 	jr		ra
 	 PTR_ADDIU	a2, 1
+#endif /* CONFIG_CPU_MIPSR6 */
 
 .Lfirst_fixup\@:
 	jr	ra
diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c
index cfcbb52..744f4a7 100644
--- a/arch/mips/lib/r3k_dump_tlb.c
+++ b/arch/mips/lib/r3k_dump_tlb.c
@@ -29,9 +29,10 @@
 {
 	int	i;
 	unsigned int asid;
-	unsigned long entryhi, entrylo0;
+	unsigned long entryhi, entrylo0, asid_mask;
 
-	asid = read_c0_entryhi() & ASID_MASK;
+	asid_mask = cpu_asid_mask(&current_cpu_data);
+	asid = read_c0_entryhi() & asid_mask;
 
 	for (i = first; i <= last; i++) {
 		write_c0_index(i<<8);
@@ -46,7 +47,7 @@
 		/* Unused entries have a virtual address of KSEG0.  */
 		if ((entryhi & PAGE_MASK) != KSEG0 &&
 		    (entrylo0 & R3K_ENTRYLO_G ||
-		     (entryhi & ASID_MASK) == asid)) {
+		     (entryhi & asid_mask) == asid)) {
 			/*
 			 * Only print entries in use
 			 */
@@ -55,7 +56,7 @@
 			printk("va=%08lx asid=%08lx"
 			       "  [pa=%06lx n=%d d=%d v=%d g=%d]",
 			       entryhi & PAGE_MASK,
-			       entryhi & ASID_MASK,
+			       entryhi & asid_mask,
 			       entrylo0 & PAGE_MASK,
 			       (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
 			       (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
index ddf1d4c..f2c714d 100644
--- a/arch/mips/loongson32/common/platform.c
+++ b/arch/mips/loongson32/common/platform.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
+ * Copyright (c) 2011-2016 Zhang, Keguang <keguang.zhang@gmail.com>
  *
  * This program is free software; you can redistribute	it and/or modify it
  * under  the terms of	the GNU General	 Public License as published by the
@@ -10,14 +10,17 @@
 #include <linux/clk.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/mtd/partitions.h>
+#include <linux/sizes.h>
 #include <linux/phy.h>
 #include <linux/serial_8250.h>
 #include <linux/stmmac.h>
 #include <linux/usb/ehci_pdriver.h>
-#include <asm-generic/sizes.h>
 
-#include <cpufreq.h>
 #include <loongson1.h>
+#include <cpufreq.h>
+#include <dma.h>
+#include <nand.h>
 
 /* 8250/16550 compatible UART */
 #define LS1X_UART(_id)						\
@@ -45,7 +48,7 @@
 	},
 };
 
-void __init ls1x_serial_setup(struct platform_device *pdev)
+void __init ls1x_serial_set_uartclk(struct platform_device *pdev)
 {
 	struct clk *clk;
 	struct plat_serial8250_port *p;
@@ -77,6 +80,42 @@
 	},
 };
 
+/* DMA */
+static struct resource ls1x_dma_resources[] = {
+	[0] = {
+		.start = LS1X_DMAC_BASE,
+		.end = LS1X_DMAC_BASE + SZ_4 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = LS1X_DMA0_IRQ,
+		.end = LS1X_DMA0_IRQ,
+		.flags = IORESOURCE_IRQ,
+	},
+	[2] = {
+		.start = LS1X_DMA1_IRQ,
+		.end = LS1X_DMA1_IRQ,
+		.flags = IORESOURCE_IRQ,
+	},
+	[3] = {
+		.start = LS1X_DMA2_IRQ,
+		.end = LS1X_DMA2_IRQ,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device ls1x_dma_pdev = {
+	.name		= "ls1x-dma",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(ls1x_dma_resources),
+	.resource	= ls1x_dma_resources,
+};
+
+void __init ls1x_dma_set_platdata(struct plat_ls1x_dma *pdata)
+{
+	ls1x_dma_pdev.dev.platform_data = pdata;
+}
+
 /* Synopsys Ethernet GMAC */
 static struct stmmac_mdio_bus_data ls1x_mdio_bus_data = {
 	.phy_mask	= 0,
@@ -198,6 +237,64 @@
 	},
 };
 
+/* GPIO */
+static struct resource ls1x_gpio0_resources[] = {
+	[0] = {
+		.start	= LS1X_GPIO0_BASE,
+		.end	= LS1X_GPIO0_BASE + SZ_4 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+struct platform_device ls1x_gpio0_pdev = {
+	.name		= "ls1x-gpio",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(ls1x_gpio0_resources),
+	.resource	= ls1x_gpio0_resources,
+};
+
+static struct resource ls1x_gpio1_resources[] = {
+	[0] = {
+		.start	= LS1X_GPIO1_BASE,
+		.end	= LS1X_GPIO1_BASE + SZ_4 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+struct platform_device ls1x_gpio1_pdev = {
+	.name		= "ls1x-gpio",
+	.id		= 1,
+	.num_resources	= ARRAY_SIZE(ls1x_gpio1_resources),
+	.resource	= ls1x_gpio1_resources,
+};
+
+/* NAND Flash */
+static struct resource ls1x_nand_resources[] = {
+	[0] = {
+		.start	= LS1X_NAND_BASE,
+		.end	= LS1X_NAND_BASE + SZ_32 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* DMA channel 0 is dedicated to NAND */
+		.start	= LS1X_DMA_CHANNEL0,
+		.end	= LS1X_DMA_CHANNEL0,
+		.flags	= IORESOURCE_DMA,
+	},
+};
+
+struct platform_device ls1x_nand_pdev = {
+	.name		= "ls1x-nand",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(ls1x_nand_resources),
+	.resource	= ls1x_nand_resources,
+};
+
+void __init ls1x_nand_set_platdata(struct plat_ls1x_nand *pdata)
+{
+	ls1x_nand_pdev.dev.platform_data = pdata;
+}
+
 /* USB EHCI */
 static u64 ls1x_ehci_dmamask = DMA_BIT_MASK(32);
 
diff --git a/arch/mips/loongson32/common/reset.c b/arch/mips/loongson32/common/reset.c
index c41e4ca..8a1d9cc 100644
--- a/arch/mips/loongson32/common/reset.c
+++ b/arch/mips/loongson32/common/reset.c
@@ -9,12 +9,13 @@
 
 #include <linux/io.h>
 #include <linux/pm.h>
+#include <linux/sizes.h>
 #include <asm/idle.h>
 #include <asm/reboot.h>
 
 #include <loongson1.h>
 
-static void __iomem *wdt_base;
+static void __iomem *wdt_reg_base;
 
 static void ls1x_halt(void)
 {
@@ -26,9 +27,9 @@
 
 static void ls1x_restart(char *command)
 {
-	__raw_writel(0x1, wdt_base + WDT_EN);
-	__raw_writel(0x1, wdt_base + WDT_TIMER);
-	__raw_writel(0x1, wdt_base + WDT_SET);
+	__raw_writel(0x1, wdt_reg_base + WDT_EN);
+	__raw_writel(0x1, wdt_reg_base + WDT_TIMER);
+	__raw_writel(0x1, wdt_reg_base + WDT_SET);
 
 	ls1x_halt();
 }
@@ -40,8 +41,8 @@
 
 static int __init ls1x_reboot_setup(void)
 {
-	wdt_base = ioremap_nocache(LS1X_WDT_BASE, 0x0f);
-	if (!wdt_base)
+	wdt_reg_base = ioremap_nocache(LS1X_WDT_BASE, (SZ_4 + SZ_8));
+	if (!wdt_reg_base)
 		panic("Failed to remap watchdog registers");
 
 	_machine_restart = ls1x_restart;
diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c
index 0996b02..ff224f0 100644
--- a/arch/mips/loongson32/common/time.c
+++ b/arch/mips/loongson32/common/time.c
@@ -9,6 +9,7 @@
 
 #include <linux/clk.h>
 #include <linux/interrupt.h>
+#include <linux/sizes.h>
 #include <asm/time.h>
 
 #include <loongson1.h>
@@ -35,25 +36,25 @@
 
 DEFINE_RAW_SPINLOCK(ls1x_timer_lock);
 
-static void __iomem *timer_base;
+static void __iomem *timer_reg_base;
 static uint32_t ls1x_jiffies_per_tick;
 
 static inline void ls1x_pwmtimer_set_period(uint32_t period)
 {
-	__raw_writel(period, timer_base + PWM_HRC);
-	__raw_writel(period, timer_base + PWM_LRC);
+	__raw_writel(period, timer_reg_base + PWM_HRC);
+	__raw_writel(period, timer_reg_base + PWM_LRC);
 }
 
 static inline void ls1x_pwmtimer_restart(void)
 {
-	__raw_writel(0x0, timer_base + PWM_CNT);
-	__raw_writel(INT_EN | CNT_EN, timer_base + PWM_CTRL);
+	__raw_writel(0x0, timer_reg_base + PWM_CNT);
+	__raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
 }
 
 void __init ls1x_pwmtimer_init(void)
 {
-	timer_base = ioremap(LS1X_TIMER_BASE, 0xf);
-	if (!timer_base)
+	timer_reg_base = ioremap_nocache(LS1X_TIMER_BASE, SZ_16);
+	if (!timer_reg_base)
 		panic("Failed to remap timer registers");
 
 	ls1x_jiffies_per_tick = DIV_ROUND_CLOSEST(mips_hpt_frequency, HZ);
@@ -86,7 +87,7 @@
 	 */
 	jifs = jiffies;
 	/* read the count */
-	count = __raw_readl(timer_base + PWM_CNT);
+	count = __raw_readl(timer_reg_base + PWM_CNT);
 
 	/*
 	 * It's possible for count to appear to go the wrong way for this
@@ -131,7 +132,7 @@
 	raw_spin_lock(&ls1x_timer_lock);
 	ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick);
 	ls1x_pwmtimer_restart();
-	__raw_writel(INT_EN | CNT_EN, timer_base + PWM_CTRL);
+	__raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
 	raw_spin_unlock(&ls1x_timer_lock);
 
 	return 0;
@@ -140,7 +141,7 @@
 static int ls1x_clockevent_tick_resume(struct clock_event_device *cd)
 {
 	raw_spin_lock(&ls1x_timer_lock);
-	__raw_writel(INT_EN | CNT_EN, timer_base + PWM_CTRL);
+	__raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
 	raw_spin_unlock(&ls1x_timer_lock);
 
 	return 0;
@@ -149,8 +150,8 @@
 static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *cd)
 {
 	raw_spin_lock(&ls1x_timer_lock);
-	__raw_writel(__raw_readl(timer_base + PWM_CTRL) & ~CNT_EN,
-		     timer_base + PWM_CTRL);
+	__raw_writel(__raw_readl(timer_reg_base + PWM_CTRL) & ~CNT_EN,
+		     timer_reg_base + PWM_CTRL);
 	raw_spin_unlock(&ls1x_timer_lock);
 
 	return 0;
@@ -220,7 +221,7 @@
 
 #ifdef CONFIG_CEVT_CSRC_LS1X
 	/* setup LS1X PWM timer */
-	clk = clk_get(NULL, "ls1x_pwmtimer");
+	clk = clk_get(NULL, "ls1x-pwmtimer");
 	if (IS_ERR(clk))
 		panic("unable to get timer clock, err=%ld", PTR_ERR(clk));
 
diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c
index 58daeea..38a1d40 100644
--- a/arch/mips/loongson32/ls1b/board.c
+++ b/arch/mips/loongson32/ls1b/board.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
+ * Copyright (c) 2011-2016 Zhang, Keguang <keguang.zhang@gmail.com>
  *
  * This program is free software; you can redistribute	it and/or modify it
  * under  the terms of	the GNU General	 Public License as published by the
@@ -7,26 +7,83 @@
  * option) any later version.
  */
 
+#include <linux/leds.h>
+#include <linux/mtd/partitions.h>
+#include <linux/sizes.h>
+
+#include <loongson1.h>
+#include <dma.h>
+#include <nand.h>
 #include <platform.h>
 
+struct plat_ls1x_dma ls1x_dma_pdata = {
+	.nr_channels	= 3,
+};
+
+static struct mtd_partition ls1x_nand_parts[] = {
+	{
+		.name        = "kernel",
+		.offset      = 0,
+		.size        = SZ_16M,
+	},
+	{
+		.name        = "rootfs",
+		.offset      = MTDPART_OFS_APPEND,
+		.size        = MTDPART_SIZ_FULL,
+	},
+};
+
+struct plat_ls1x_nand ls1x_nand_pdata = {
+	.parts		= ls1x_nand_parts,
+	.nr_parts	= ARRAY_SIZE(ls1x_nand_parts),
+	.hold_cycle	= 0x2,
+	.wait_cycle	= 0xc,
+};
+
+static const struct gpio_led ls1x_gpio_leds[] __initconst = {
+	{
+		.name			= "LED9",
+		.default_trigger	= "heartbeat",
+		.gpio			= 38,
+		.active_low		= 1,
+		.default_state		= LEDS_GPIO_DEFSTATE_OFF,
+	}, {
+		.name			= "LED6",
+		.default_trigger	= "nand-disk",
+		.gpio			= 39,
+		.active_low		= 1,
+		.default_state		= LEDS_GPIO_DEFSTATE_OFF,
+	},
+};
+
+static const struct gpio_led_platform_data ls1x_led_pdata __initconst = {
+	.num_leds	= ARRAY_SIZE(ls1x_gpio_leds),
+	.leds		= ls1x_gpio_leds,
+};
+
 static struct platform_device *ls1b_platform_devices[] __initdata = {
 	&ls1x_uart_pdev,
 	&ls1x_cpufreq_pdev,
+	&ls1x_dma_pdev,
 	&ls1x_eth0_pdev,
 	&ls1x_eth1_pdev,
 	&ls1x_ehci_pdev,
+	&ls1x_gpio0_pdev,
+	&ls1x_gpio1_pdev,
+	&ls1x_nand_pdev,
 	&ls1x_rtc_pdev,
 };
 
 static int __init ls1b_platform_init(void)
 {
-	int err;
+	ls1x_serial_set_uartclk(&ls1x_uart_pdev);
+	ls1x_dma_set_platdata(&ls1x_dma_pdata);
+	ls1x_nand_set_platdata(&ls1x_nand_pdata);
 
-	ls1x_serial_setup(&ls1x_uart_pdev);
+	gpio_led_register_device(-1, &ls1x_led_pdata);
 
-	err = platform_add_devices(ls1b_platform_devices,
+	return platform_add_devices(ls1b_platform_devices,
 				   ARRAY_SIZE(ls1b_platform_devices));
-	return err;
 }
 
 arch_initcall(ls1b_platform_init);
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 85d8089..0fce460 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -31,7 +31,7 @@
 # can't easily be used safely within the kbuild framework.
 #
 ifeq ($(call cc-ifversion, -ge, 0409, y), y)
-  ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
+  ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
     cflags-$(CONFIG_CPU_LOONGSON3)  += \
       $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   else
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index d6d07ad..57d590a 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -105,6 +105,10 @@
 		loongson_chiptemp[1] = 0x900010001fe0019c;
 		loongson_chiptemp[2] = 0x900020001fe0019c;
 		loongson_chiptemp[3] = 0x900030001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900010001fe001d0;
+		loongson_freqctrl[2] = 0x900020001fe001d0;
+		loongson_freqctrl[3] = 0x900030001fe001d0;
 		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
 	} else if (ecpu->cputype == Loongson_3B) {
@@ -187,7 +191,8 @@
 		case PRID_REV_LOONGSON2F:
 			cpu_clock_freq = 797000000;
 			break;
-		case PRID_REV_LOONGSON3A:
+		case PRID_REV_LOONGSON3A_R1:
+		case PRID_REV_LOONGSON3A_R2:
 			cpu_clock_freq = 900000000;
 			break;
 		case PRID_REV_LOONGSON3B_R1:
diff --git a/arch/mips/loongson64/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile
index 622fead..44bc148 100644
--- a/arch/mips/loongson64/loongson-3/Makefile
+++ b/arch/mips/loongson64/loongson-3/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for Loongson-3 family machines
 #
-obj-y			+= irq.o cop2-ex.o platform.o
+obj-y			+= irq.o cop2-ex.o platform.o acpi_init.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 
diff --git a/drivers/platform/mips/acpi_init.c b/arch/mips/loongson64/loongson-3/acpi_init.c
similarity index 100%
rename from drivers/platform/mips/acpi_init.c
rename to arch/mips/loongson64/loongson-3/acpi_init.c
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 0f75b6b..8e76490 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -24,19 +24,21 @@
 	}
 }
 
+#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
+
 void mach_irq_dispatch(unsigned int pending)
 {
 	if (pending & CAUSEF_IP7)
 		do_IRQ(LOONGSON_TIMER_IRQ);
 #if defined(CONFIG_SMP)
-	else if (pending & CAUSEF_IP6)
+	if (pending & CAUSEF_IP6)
 		loongson3_ipi_interrupt(NULL);
 #endif
-	else if (pending & CAUSEF_IP3)
+	if (pending & CAUSEF_IP3)
 		ht_irqdispatch();
-	else if (pending & CAUSEF_IP2)
+	if (pending & CAUSEF_IP2)
 		do_IRQ(LOONGSON_UART_IRQ);
-	else {
+	if (pending & UNUSED_IPS) {
 		pr_err("%s : spurious interrupt\n", __func__);
 		spurious_interrupt();
 	}
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 6f9e010..282c5a8 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -213,10 +213,10 @@
 		BOOTMEM_DEFAULT);
 
 	if (node == 0 && node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) {
-		/* Reserve 0xff800000~0xffffffff for RS780E integrated GPU */
+		/* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
 		reserve_bootmem_node(NODE_DATA(node),
-				(node_addrspace_offset | 0xff800000),
-				8 << 20, BOOTMEM_DEFAULT);
+				(node_addrspace_offset | 0xfe000000),
+				32 << 20, BOOTMEM_DEFAULT);
 	}
 
 	sparse_memory_present_with_active_regions(node);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 509832a9..e59759a 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -421,7 +421,6 @@
 	local_irq_save(flags);
 	fixup_irqs();
 	local_irq_restore(flags);
-	flush_cache_all();
 	local_flush_tlb_all();
 
 	return 0;
@@ -440,7 +439,7 @@
  * flush all L1 entries at first. Then, another core (usually Core 0) can
  * safely disable the clock of the target core. loongson3_play_dead() is
  * called via CKSEG1 (uncached and unmmaped) */
-static void loongson3a_play_dead(int *state_addr)
+static void loongson3a_r1_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -502,6 +501,89 @@
 		: "a1");
 }
 
+static void loongson3a_r2_play_dead(int *state_addr)
+{
+	register int val;
+	register long cpuid, core, node, count;
+	register void *addr, *base, *initfunc;
+
+	__asm__ __volatile__(
+		"   .set push                     \n"
+		"   .set noreorder                \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
+		"   cache 0, 1(%[addr])           \n"
+		"   cache 0, 2(%[addr])           \n"
+		"   cache 0, 3(%[addr])           \n"
+		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
+		"   cache 1, 1(%[addr])           \n"
+		"   cache 1, 2(%[addr])           \n"
+		"   cache 1, 3(%[addr])           \n"
+		"   addiu %[sets], %[sets], -1    \n"
+		"   bnez  %[sets], 1b             \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
+		"   cache 2, 1(%[addr])           \n"
+		"   cache 2, 2(%[addr])           \n"
+		"   cache 2, 3(%[addr])           \n"
+		"   cache 2, 4(%[addr])           \n"
+		"   cache 2, 5(%[addr])           \n"
+		"   cache 2, 6(%[addr])           \n"
+		"   cache 2, 7(%[addr])           \n"
+		"   cache 2, 8(%[addr])           \n"
+		"   cache 2, 9(%[addr])           \n"
+		"   cache 2, 10(%[addr])          \n"
+		"   cache 2, 11(%[addr])          \n"
+		"   cache 2, 12(%[addr])          \n"
+		"   cache 2, 13(%[addr])          \n"
+		"   cache 2, 14(%[addr])          \n"
+		"   cache 2, 15(%[addr])          \n"
+		"   addiu %[vsets], %[vsets], -1  \n"
+		"   bnez  %[vsets], 2b            \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
+		"   sw    %[val], (%[state_addr]) \n"
+		"   sync                          \n"
+		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
+		"   .set pop                      \n"
+		: [addr] "=&r" (addr), [val] "=&r" (val)
+		: [state_addr] "r" (state_addr),
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
+		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
+
+	__asm__ __volatile__(
+		"   .set push                         \n"
+		"   .set noreorder                    \n"
+		"   .set mips64                       \n"
+		"   mfc0  %[cpuid], $15, 1            \n"
+		"   andi  %[cpuid], 0x3ff             \n"
+		"   dli   %[base], 0x900000003ff01000 \n"
+		"   andi  %[core], %[cpuid], 0x3      \n"
+		"   sll   %[core], 8                  \n" /* get core id */
+		"   or    %[base], %[base], %[core]   \n"
+		"   andi  %[node], %[cpuid], 0xc      \n"
+		"   dsll  %[node], 42                 \n" /* get node id */
+		"   or    %[base], %[base], %[node]   \n"
+		"1: li    %[count], 0x100             \n" /* wait for init loop */
+		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
+		"   addiu %[count], -1                \n"
+		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
+		"   beqz  %[initfunc], 1b             \n"
+		"   nop                               \n"
+		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
+		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
+		"   ld    $a1, 0x38(%[base])          \n"
+		"   jr    %[initfunc]                 \n" /* jump to initial PC */
+		"   nop                               \n"
+		"   .set pop                          \n"
+		: [core] "=&r" (core), [node] "=&r" (node),
+		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
+		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
+		: /* No Input */
+		: "a1");
+}
+
 static void loongson3b_play_dead(int *state_addr)
 {
 	register int val;
@@ -573,13 +655,18 @@
 	void (*play_dead_at_ckseg1)(int *);
 
 	idle_task_exit();
-	switch (loongson_sysconf.cputype) {
-	case Loongson_3A:
+	switch (read_c0_prid() & PRID_REV_MASK) {
+	case PRID_REV_LOONGSON3A_R1:
 	default:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
 		break;
-	case Loongson_3B:
+	case PRID_REV_LOONGSON3A_R2:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2_play_dead);
+		break;
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
 		play_dead_at_ckseg1 =
 			(void *)CKSEG1ADDR((unsigned long)loongson3b_play_dead);
 		break;
@@ -594,9 +681,9 @@
 	uint64_t core_id = cpu_data[cpu].core;
 	uint64_t package_id = cpu_data[cpu].package;
 
-	if (loongson_sysconf.cputype == Loongson_3A) {
+	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
 		LOONGSON_CHIPCFG(package_id) &= ~(1 << (12 + core_id));
-	} else if (loongson_sysconf.cputype == Loongson_3B) {
+	} else {
 		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
 			LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
 	}
@@ -607,9 +694,9 @@
 	uint64_t core_id = cpu_data[cpu].core;
 	uint64_t package_id = cpu_data[cpu].package;
 
-	if (loongson_sysconf.cputype == Loongson_3A) {
+	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
 		LOONGSON_CHIPCFG(package_id) |= 1 << (12 + core_id);
-	} else if (loongson_sysconf.cputype == Loongson_3B) {
+	} else {
 		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
 			LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
 	}
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index a19641d..e9bbc2a 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -4,9 +4,9 @@
 
 obj-y	+= cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
 	   dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
-	   dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \
+	   dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \
 	   sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
-	   sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \
+	   sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \
 	   dsemul.o
 
 lib-y	+= ieee754d.o \
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index cdfd44f..d96e912b 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -445,9 +445,11 @@
 	case spec_op:
 		switch (insn.r_format.func) {
 		case jalr_op:
-			regs->regs[insn.r_format.rd] =
-				regs->cp0_epc + dec_insn.pc_inc +
-				dec_insn.next_pc_inc;
+			if (insn.r_format.rd != 0) {
+				regs->regs[insn.r_format.rd] =
+					regs->cp0_epc + dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+			}
 			/* Fall through */
 		case jr_op:
 			/* For R6, JR already emulated in jalr_op */
@@ -973,9 +975,10 @@
 		struct mm_decoded_insn dec_insn, void *__user *fault_addr)
 {
 	unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc;
-	unsigned int cond, cbit;
+	unsigned int cond, cbit, bit0;
 	mips_instruction ir;
 	int likely, pc_inc;
+	union fpureg *fpr;
 	u32 __user *wva;
 	u64 __user *dva;
 	u32 wval;
@@ -1187,14 +1190,14 @@
 				return SIGILL;
 
 			cond = likely = 0;
+			fpr = &current->thread.fpu.fpr[MIPSInst_RT(ir)];
+			bit0 = get_fpr32(fpr, 0) & 0x1;
 			switch (MIPSInst_RS(ir)) {
 			case bc1eqz_op:
-				if (get_fpr32(&current->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)
-				    cond = 1;
+				cond = bit0 == 0;
 				break;
 			case bc1nez_op:
-				if (!(get_fpr32(&current->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1))
-				    cond = 1;
+				cond = bit0 != 0;
 				break;
 			}
 			goto branch_common;
@@ -1674,7 +1677,7 @@
 			union ieee754sp(*b) (union ieee754sp, union ieee754sp);
 			union ieee754sp(*u) (union ieee754sp);
 		} handler;
-		union ieee754sp fs, ft;
+		union ieee754sp fd, fs, ft;
 
 		switch (MIPSInst_FUNC(ir)) {
 			/* binary ops */
@@ -1945,6 +1948,17 @@
 			rfmt = w_fmt;
 			goto copcsr;
 
+		case fsel_op:
+			if (!cpu_has_mips_r6)
+				return SIGILL;
+
+			SPFROMREG(fd, MIPSInst_FD(ir));
+			if (fd.bits & 0x1)
+				SPFROMREG(rv.s, MIPSInst_FT(ir));
+			else
+				SPFROMREG(rv.s, MIPSInst_FS(ir));
+			break;
+
 		case fcvtl_op:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
@@ -1993,7 +2007,7 @@
 	}
 
 	case d_fmt: {
-		union ieee754dp fs, ft;
+		union ieee754dp fd, fs, ft;
 		union {
 			union ieee754dp(*b) (union ieee754dp, union ieee754dp);
 			union ieee754dp(*u) (union ieee754dp);
@@ -2243,6 +2257,17 @@
 			rfmt = w_fmt;
 			goto copcsr;
 
+		case fsel_op:
+			if (!cpu_has_mips_r6)
+				return SIGILL;
+
+			DPFROMREG(fd, MIPSInst_FD(ir));
+			if (fd.bits & 0x1)
+				DPFROMREG(rv.d, MIPSInst_FT(ir));
+			else
+				DPFROMREG(rv.d, MIPSInst_FS(ir));
+			break;
+
 		case fcvtl_op:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 119eda9..4a2d03c 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,8 +14,12 @@
 
 #include "ieee754dp.h"
 
-union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
-				union ieee754dp y)
+enum maddf_flags {
+	maddf_negate_product	= 1 << 0,
+};
+
+static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
+				 union ieee754dp y, enum maddf_flags flags)
 {
 	int re;
 	int rs;
@@ -32,16 +36,15 @@
 
 	COMPXDP;
 	COMPYDP;
-
-	u64 zm; int ze; int zs __maybe_unused; int zc;
+	COMPZDP;
 
 	EXPLODEXDP;
 	EXPLODEYDP;
-	EXPLODEDP(z, zc, zs, ze, zm)
+	EXPLODEZDP;
 
 	FLUSHXDP;
 	FLUSHYDP;
-	FLUSHDP(z, zc, zs, ze, zm);
+	FLUSHZDP;
 
 	ieee754_clearcx();
 
@@ -50,7 +53,7 @@
 		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		return ieee754dp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
-		DPDNORMx(zm, ze);
+		DPDNORMZ;
 	/* QNAN is handled separately below */
 	}
 
@@ -154,13 +157,15 @@
 
 	re = xe + ye;
 	rs = xs ^ ys;
+	if (flags & maddf_negate_product)
+		rs ^= 1;
 
 	/* shunt to top of word */
 	xm <<= 64 - (DP_FBITS + 1);
 	ym <<= 64 - (DP_FBITS + 1);
 
 	/*
-	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+	 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
 	 */
 
 	/* 32 * 32 => 64 */
@@ -198,7 +203,7 @@
 	if ((s64) rm < 0) {
 		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
 		     ((rm << (DP_FBITS + 1 + 3)) != 0);
-			re++;
+		re++;
 	} else {
 		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
 		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
@@ -263,3 +268,15 @@
 
 	return ieee754dp_format(zs, ze, zm);
 }
+
+union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
+				union ieee754dp y)
+{
+	return _dp_maddf(z, x, y, 0);
+}
+
+union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
+				union ieee754dp y)
+{
+	return _dp_maddf(z, x, y, maddf_negate_product);
+}
diff --git a/arch/mips/math-emu/dp_msubf.c b/arch/mips/math-emu/dp_msubf.c
deleted file mode 100644
index 1224126..0000000
--- a/arch/mips/math-emu/dp_msubf.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * IEEE754 floating point arithmetic
- * double precision: MSUB.f (Fused Multiply Subtract)
- * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
- *
- * MIPS floating point support
- * Copyright (C) 2015 Imagination Technologies, Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; version 2 of the License.
- */
-
-#include "ieee754dp.h"
-
-union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
-				union ieee754dp y)
-{
-	int re;
-	int rs;
-	u64 rm;
-	unsigned lxm;
-	unsigned hxm;
-	unsigned lym;
-	unsigned hym;
-	u64 lrm;
-	u64 hrm;
-	u64 t;
-	u64 at;
-	int s;
-
-	COMPXDP;
-	COMPYDP;
-
-	u64 zm; int ze; int zs __maybe_unused; int zc;
-
-	EXPLODEXDP;
-	EXPLODEYDP;
-	EXPLODEDP(z, zc, zs, ze, zm)
-
-	FLUSHXDP;
-	FLUSHYDP;
-	FLUSHDP(z, zc, zs, ze, zm);
-
-	ieee754_clearcx();
-
-	switch (zc) {
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(z);
-	case IEEE754_CLASS_DNORM:
-		DPDNORMx(zm, ze);
-	/* QNAN is handled separately below */
-	}
-
-	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
-		return ieee754dp_nanxcpt(y);
-
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		return ieee754dp_nanxcpt(x);
-
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
-		return y;
-
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
-		return x;
-
-
-	/*
-	 * Infinity handling
-	 */
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_indef();
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		return ieee754dp_inf(xs ^ ys);
-
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		if (zc == IEEE754_CLASS_INF)
-			return ieee754dp_inf(zs);
-		/* Multiplication is 0 so just return z */
-		return z;
-
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
-		DPDNORMX;
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754dp_inf(zs);
-		DPDNORMY;
-		break;
-
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754dp_inf(zs);
-		DPDNORMX;
-		break;
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754dp_inf(zs);
-		/* fall through to real computations */
-	}
-
-	/* Finally get to do some computation */
-
-	/*
-	 * Do the multiplication bit first
-	 *
-	 * rm = xm * ym, re = xe + ye basically
-	 *
-	 * At this point xm and ym should have been normalized.
-	 */
-	assert(xm & DP_HIDDEN_BIT);
-	assert(ym & DP_HIDDEN_BIT);
-
-	re = xe + ye;
-	rs = xs ^ ys;
-
-	/* shunt to top of word */
-	xm <<= 64 - (DP_FBITS + 1);
-	ym <<= 64 - (DP_FBITS + 1);
-
-	/*
-	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
-	 */
-
-	/* 32 * 32 => 64 */
-#define DPXMULT(x, y)	((u64)(x) * (u64)y)
-
-	lxm = xm;
-	hxm = xm >> 32;
-	lym = ym;
-	hym = ym >> 32;
-
-	lrm = DPXMULT(lxm, lym);
-	hrm = DPXMULT(hxm, hym);
-
-	t = DPXMULT(lxm, hym);
-
-	at = lrm + (t << 32);
-	hrm += at < lrm;
-	lrm = at;
-
-	hrm = hrm + (t >> 32);
-
-	t = DPXMULT(hxm, lym);
-
-	at = lrm + (t << 32);
-	hrm += at < lrm;
-	lrm = at;
-
-	hrm = hrm + (t >> 32);
-
-	rm = hrm | (lrm != 0);
-
-	/*
-	 * Sticky shift down to normal rounding precision.
-	 */
-	if ((s64) rm < 0) {
-		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
-		     ((rm << (DP_FBITS + 1 + 3)) != 0);
-			re++;
-	} else {
-		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
-		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
-	}
-	assert(rm & (DP_HIDDEN_BIT << 3));
-
-	/* And now the subtraction */
-
-	/* flip sign of r and handle as add */
-	rs ^= 1;
-
-	assert(zm & DP_HIDDEN_BIT);
-
-	/*
-	 * Provide guard,round and stick bit space.
-	 */
-	zm <<= 3;
-
-	if (ze > re) {
-		/*
-		 * Have to shift y fraction right to align.
-		 */
-		s = ze - re;
-		rm = XDPSRS(rm, s);
-		re += s;
-	} else if (re > ze) {
-		/*
-		 * Have to shift x fraction right to align.
-		 */
-		s = re - ze;
-		zm = XDPSRS(zm, s);
-		ze += s;
-	}
-	assert(ze == re);
-	assert(ze <= DP_EMAX);
-
-	if (zs == rs) {
-		/*
-		 * Generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in xm, xs and xe.
-		 */
-		zm = zm + rm;
-
-		if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
-			zm = XDPSRS1(zm);
-			ze++;
-		}
-	} else {
-		if (zm >= rm) {
-			zm = zm - rm;
-		} else {
-			zm = rm - zm;
-			zs = rs;
-		}
-		if (zm == 0)
-			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
-
-		/*
-		 * Normalize to rounding precision.
-		 */
-		while ((zm >> (DP_FBITS + 3)) == 0) {
-			zm <<= 1;
-			ze--;
-		}
-	}
-
-	return ieee754dp_format(zs, ze, zm);
-}
diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index d0901f0..87d0b44 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c
@@ -125,7 +125,7 @@
 	ym <<= 64 - (DP_FBITS + 1);
 
 	/*
-	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+	 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
 	 */
 
 	/* 32 * 32 => 64 */
@@ -163,7 +163,7 @@
 	if ((s64) rm < 0) {
 		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
 		     ((rm << (DP_FBITS + 1 + 3)) != 0);
-			re++;
+		re++;
 	} else {
 		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
 		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 46b964d..d4ceacd 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -60,7 +60,7 @@
 			unsigned int rs;
 			s32 v;
 
-			rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2);
+			rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2);
 			v = regs->cp0_epc & ~3;
 			v += insn.mm_a_format.simmediate << 2;
 			regs->regs[rs] = (long)v;
diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index 47d26c8..465a034 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c
@@ -54,10 +54,13 @@
 	assert(ieee754dp_issnan(r));
 
 	ieee754_setcx(IEEE754_INVALID_OPERATION);
-	if (ieee754_csr.nan2008)
+	if (ieee754_csr.nan2008) {
 		DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
-	else
-		r = ieee754dp_indef();
+	} else {
+		DPMANT(r) &= ~DP_MBIT(DP_FBITS - 1);
+		if (!ieee754dp_isnan(r))
+			DPMANT(r) |= DP_MBIT(DP_FBITS - 2);
+	}
 
 	return r;
 }
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index e2babd9..9ba0230 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -60,6 +60,7 @@
 	while ((m >> DP_FBITS) == 0) { m <<= 1; e--; }
 #define DPDNORMX	DPDNORMx(xm, xe)
 #define DPDNORMY	DPDNORMx(ym, ye)
+#define DPDNORMZ	DPDNORMx(zm, ze)
 
 static inline union ieee754dp builddp(int s, int bx, u64 m)
 {
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index ed7bb27..8bc2f69 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -55,6 +55,9 @@
 #define COMPYSP \
 	unsigned ym; int ye; int ys; int yc
 
+#define COMPZSP \
+	unsigned zm; int ze; int zs; int zc
+
 #define EXPLODESP(v, vc, vs, ve, vm)					\
 {									\
 	vs = SPSIGN(v);							\
@@ -81,6 +84,7 @@
 }
 #define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm)
 #define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym)
+#define EXPLODEZSP EXPLODESP(z, zc, zs, ze, zm)
 
 
 #define COMPXDP \
@@ -89,6 +93,9 @@
 #define COMPYDP \
 	u64 ym; int ye; int ys; int yc
 
+#define COMPZDP \
+	u64 zm; int ze; int zs; int zc
+
 #define EXPLODEDP(v, vc, vs, ve, vm)					\
 {									\
 	vm = DPMANT(v);							\
@@ -115,6 +122,7 @@
 }
 #define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm)
 #define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym)
+#define EXPLODEZDP EXPLODEDP(z, zc, zs, ze, zm)
 
 #define FLUSHDP(v, vc, vs, ve, vm)					\
 	if (vc==IEEE754_CLASS_DNORM) {					\
@@ -140,7 +148,9 @@
 
 #define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm)
 #define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym)
+#define FLUSHZDP FLUSHDP(z, zc, zs, ze, zm)
 #define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm)
 #define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym)
+#define FLUSHZSP FLUSHSP(z, zc, zs, ze, zm)
 
 #endif /* __IEEE754INT_H  */
diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index e0b2c45..260e6896 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c
@@ -54,10 +54,13 @@
 	assert(ieee754sp_issnan(r));
 
 	ieee754_setcx(IEEE754_INVALID_OPERATION);
-	if (ieee754_csr.nan2008)
+	if (ieee754_csr.nan2008) {
 		SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
-	else
-		r = ieee754sp_indef();
+	} else {
+		SPMANT(r) &= ~SP_MBIT(SP_FBITS - 1);
+		if (!ieee754sp_isnan(r))
+			SPMANT(r) |= SP_MBIT(SP_FBITS - 2);
+	}
 
 	return r;
 }
@@ -138,7 +141,8 @@
 		} else {
 			/* sticky right shift es bits
 			 */
-			SPXSRSXn(es);
+			xm = XSPSRS(xm, es);
+			xe += es;
 			assert((xm & (SP_HIDDEN_BIT << 3)) == 0);
 			assert(xe == SP_EMIN);
 		}
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 374a3f0..8476067 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -46,25 +46,24 @@
 }
 
 /* 3bit extended single precision sticky right shift */
-#define SPXSRSXn(rs)							\
-	(xe += rs,							\
-	 xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0))
+#define XSPSRS(v, rs)						\
+	((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0))
+
+#define XSPSRS1(m) \
+	((m >> 1) | (m & 1))
 
 #define SPXSRSX1() \
-	(xe++, (xm = (xm >> 1) | (xm & 1)))
-
-#define SPXSRSYn(rs)								\
-	(ye+=rs,								\
-	 ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0))
+	(xe++, (xm = XSPSRS1(xm)))
 
 #define SPXSRSY1() \
-	(ye++, (ym = (ym >> 1) | (ym & 1)))
+	(ye++, (ym = XSPSRS1(ym)))
 
 /* convert denormal to normalized with extended exponent */
 #define SPDNORMx(m,e) \
 	while ((m >> SP_FBITS) == 0) { m <<= 1; e--; }
 #define SPDNORMX	SPDNORMx(xm, xe)
 #define SPDNORMY	SPDNORMx(ym, ye)
+#define SPDNORMZ	SPDNORMx(zm, ze)
 
 static inline union ieee754sp buildsp(int s, int bx, unsigned m)
 {
diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index f1c87b0..c55c0c0 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c
@@ -132,13 +132,15 @@
 		 * Have to shift y fraction right to align.
 		 */
 		s = xe - ye;
-		SPXSRSYn(s);
+		ym = XSPSRS(ym, s);
+		ye += s;
 	} else if (ye > xe) {
 		/*
 		 * Have to shift x fraction right to align.
 		 */
 		s = ye - xe;
-		SPXSRSXn(s);
+		xm = XSPSRS(xm, s);
+		xe += s;
 	}
 	assert(xe == ye);
 	assert(xe <= SP_EMAX);
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index dd1dd83..a8cd8b4 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,8 +14,12 @@
 
 #include "ieee754sp.h"
 
-union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
-				union ieee754sp y)
+enum maddf_flags {
+	maddf_negate_product	= 1 << 0,
+};
+
+static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
+				 union ieee754sp y, enum maddf_flags flags)
 {
 	int re;
 	int rs;
@@ -32,15 +36,15 @@
 
 	COMPXSP;
 	COMPYSP;
-	u32 zm; int ze; int zs __maybe_unused; int zc;
+	COMPZSP;
 
 	EXPLODEXSP;
 	EXPLODEYSP;
-	EXPLODESP(z, zc, zs, ze, zm)
+	EXPLODEZSP;
 
 	FLUSHXSP;
 	FLUSHYSP;
-	FLUSHSP(z, zc, zs, ze, zm);
+	FLUSHZSP;
 
 	ieee754_clearcx();
 
@@ -49,7 +53,7 @@
 		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		return ieee754sp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
-		SPDNORMx(zm, ze);
+		SPDNORMZ;
 	/* QNAN is handled separately below */
 	}
 
@@ -154,6 +158,8 @@
 
 	re = xe + ye;
 	rs = xs ^ ys;
+	if (flags & maddf_negate_product)
+		rs ^= 1;
 
 	/* shunt to top of word */
 	xm <<= 32 - (SP_FBITS + 1);
@@ -208,16 +214,18 @@
 
 	if (ze > re) {
 		/*
-		 * Have to shift y fraction right to align.
+		 * Have to shift r fraction right to align.
 		 */
 		s = ze - re;
-		SPXSRSYn(s);
+		rm = XSPSRS(rm, s);
+		re += s;
 	} else if (re > ze) {
 		/*
-		 * Have to shift x fraction right to align.
+		 * Have to shift z fraction right to align.
 		 */
 		s = re - ze;
-		SPXSRSYn(s);
+		zm = XSPSRS(zm, s);
+		ze += s;
 	}
 	assert(ze == re);
 	assert(ze <= SP_EMAX);
@@ -230,7 +238,8 @@
 		zm = zm + rm;
 
 		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
-			SPXSRSX1();
+			zm = XSPSRS1(zm);
+			ze++;
 		}
 	} else {
 		if (zm >= rm) {
@@ -253,3 +262,15 @@
 	}
 	return ieee754sp_format(zs, ze, zm);
 }
+
+union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
+				union ieee754sp y)
+{
+	return _sp_maddf(z, x, y, 0);
+}
+
+union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
+				union ieee754sp y)
+{
+	return _sp_maddf(z, x, y, maddf_negate_product);
+}
diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c
deleted file mode 100644
index 81c38b980..0000000
--- a/arch/mips/math-emu/sp_msubf.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * IEEE754 floating point arithmetic
- * single precision: MSUB.f (Fused Multiply Subtract)
- * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
- *
- * MIPS floating point support
- * Copyright (C) 2015 Imagination Technologies, Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; version 2 of the License.
- */
-
-#include "ieee754sp.h"
-
-union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
-				union ieee754sp y)
-{
-	int re;
-	int rs;
-	unsigned rm;
-	unsigned short lxm;
-	unsigned short hxm;
-	unsigned short lym;
-	unsigned short hym;
-	unsigned lrm;
-	unsigned hrm;
-	unsigned t;
-	unsigned at;
-	int s;
-
-	COMPXSP;
-	COMPYSP;
-	u32 zm; int ze; int zs __maybe_unused; int zc;
-
-	EXPLODEXSP;
-	EXPLODEYSP;
-	EXPLODESP(z, zc, zs, ze, zm)
-
-	FLUSHXSP;
-	FLUSHYSP;
-	FLUSHSP(z, zc, zs, ze, zm);
-
-	ieee754_clearcx();
-
-	switch (zc) {
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(z);
-	case IEEE754_CLASS_DNORM:
-		SPDNORMx(zm, ze);
-	/* QNAN is handled separately below */
-	}
-
-	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
-		return ieee754sp_nanxcpt(y);
-
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		return ieee754sp_nanxcpt(x);
-
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
-		return y;
-
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
-		return x;
-
-	/*
-	 * Infinity handling
-	 */
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_indef();
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		return ieee754sp_inf(xs ^ ys);
-
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		if (zc == IEEE754_CLASS_INF)
-			return ieee754sp_inf(zs);
-		/* Multiplication is 0 so just return z */
-		return z;
-
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
-		SPDNORMX;
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754sp_inf(zs);
-		SPDNORMY;
-		break;
-
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754sp_inf(zs);
-		SPDNORMX;
-		break;
-
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
-			return ieee754sp_inf(zs);
-		/* fall through to real compuation */
-	}
-
-	/* Finally get to do some computation */
-
-	/*
-	 * Do the multiplication bit first
-	 *
-	 * rm = xm * ym, re = xe + ye basically
-	 *
-	 * At this point xm and ym should have been normalized.
-	 */
-
-	/* rm = xm * ym, re = xe+ye basically */
-	assert(xm & SP_HIDDEN_BIT);
-	assert(ym & SP_HIDDEN_BIT);
-
-	re = xe + ye;
-	rs = xs ^ ys;
-
-	/* shunt to top of word */
-	xm <<= 32 - (SP_FBITS + 1);
-	ym <<= 32 - (SP_FBITS + 1);
-
-	/*
-	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
-	 */
-	lxm = xm & 0xffff;
-	hxm = xm >> 16;
-	lym = ym & 0xffff;
-	hym = ym >> 16;
-
-	lrm = lxm * lym;	/* 16 * 16 => 32 */
-	hrm = hxm * hym;	/* 16 * 16 => 32 */
-
-	t = lxm * hym; /* 16 * 16 => 32 */
-	at = lrm + (t << 16);
-	hrm += at < lrm;
-	lrm = at;
-	hrm = hrm + (t >> 16);
-
-	t = hxm * lym; /* 16 * 16 => 32 */
-	at = lrm + (t << 16);
-	hrm += at < lrm;
-	lrm = at;
-	hrm = hrm + (t >> 16);
-
-	rm = hrm | (lrm != 0);
-
-	/*
-	 * Sticky shift down to normal rounding precision.
-	 */
-	if ((int) rm < 0) {
-		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
-		    ((rm << (SP_FBITS + 1 + 3)) != 0);
-		re++;
-	} else {
-		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
-		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
-	}
-	assert(rm & (SP_HIDDEN_BIT << 3));
-
-	/* And now the subtraction */
-
-	/* Flip sign of r and handle as add */
-	rs ^= 1;
-
-	assert(zm & SP_HIDDEN_BIT);
-
-	/*
-	 * Provide guard,round and stick bit space.
-	 */
-	zm <<= 3;
-
-	if (ze > re) {
-		/*
-		 * Have to shift y fraction right to align.
-		 */
-		s = ze - re;
-		SPXSRSYn(s);
-	} else if (re > ze) {
-		/*
-		 * Have to shift x fraction right to align.
-		 */
-		s = re - ze;
-		SPXSRSYn(s);
-	}
-	assert(ze == re);
-	assert(ze <= SP_EMAX);
-
-	if (zs == rs) {
-		/*
-		 * Generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in zm, zs and ze.
-		 */
-		zm = zm + rm;
-
-		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
-			SPXSRSX1(); /* shift preserving sticky */
-		}
-	} else {
-		if (zm >= rm) {
-			zm = zm - rm;
-		} else {
-			zm = rm - zm;
-			zs = rs;
-		}
-		if (zm == 0)
-			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
-
-		/*
-		 * Normalize in extended single precision
-		 */
-		while ((zm >> (SP_MBITS + 3)) == 0) {
-			zm <<= 1;
-			ze--;
-		}
-
-	}
-	return ieee754sp_format(zs, ze, zm);
-}
diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index ec5f937..dc998ed 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c
@@ -134,13 +134,15 @@
 		 * have to shift y fraction right to align
 		 */
 		s = xe - ye;
-		SPXSRSYn(s);
+		ym = XSPSRS(ym, s);
+		ye += s;
 	} else if (ye > xe) {
 		/*
 		 * have to shift x fraction right to align
 		 */
 		s = ye - xe;
-		SPXSRSXn(s);
+		xm = XSPSRS(xm, s);
+		xe += s;
 	}
 	assert(xe == ye);
 	assert(xe <= SP_EMAX);
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index caac3d7..ef7f925 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -77,6 +77,7 @@
  */
 static unsigned long icache_size __read_mostly;
 static unsigned long dcache_size __read_mostly;
+static unsigned long vcache_size __read_mostly;
 static unsigned long scache_size __read_mostly;
 
 /*
@@ -447,6 +448,11 @@
 		r4k_blast_scache();
 		break;
 
+	case CPU_BMIPS5000:
+		r4k_blast_scache();
+		__sync();
+		break;
+
 	default:
 		r4k_blast_dcache();
 		r4k_blast_icache();
@@ -492,7 +498,14 @@
 	if (!(has_valid_asid(vma->vm_mm)))
 		return;
 
-	r4k_blast_dcache();
+	/*
+	 * If dcache can alias, we must blast it since mapping is changing.
+	 * If executable, we must ensure any dirty lines are written back far
+	 * enough to be visible to icache.
+	 */
+	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
+		r4k_blast_dcache();
+	/* If executable, blast stale lines from icache */
 	if (exec)
 		r4k_blast_icache();
 }
@@ -502,7 +515,7 @@
 {
 	int exec = vma->vm_flags & VM_EXEC;
 
-	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
+	if (cpu_has_dc_aliases || exec)
 		r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
 }
 
@@ -1148,6 +1161,8 @@
 					  c->dcache.ways *
 					  c->dcache.linesz;
 		c->dcache.waybit = 0;
+		if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2)
+			c->options |= MIPS_CPU_PREFETCH;
 		break;
 
 	case CPU_CAVIUM_OCTEON3:
@@ -1278,6 +1293,8 @@
 	case CPU_M5150:
 	case CPU_QEMU_GENERIC:
 	case CPU_I6400:
+	case CPU_P6600:
+	case CPU_M6250:
 		if (!(read_c0_config7() & MIPS_CONF7_IAR) &&
 		    (c->icache.waysize > PAGE_SIZE))
 			c->icache.flags |= MIPS_CACHE_ALIASES;
@@ -1304,9 +1321,16 @@
 		break;
 
 	case CPU_ALCHEMY:
+	case CPU_I6400:
 		c->icache.flags |= MIPS_CACHE_IC_F_DC;
 		break;
 
+	case CPU_BMIPS5000:
+		c->icache.flags |= MIPS_CACHE_IC_F_DC;
+		/* Cache aliases are handled in hardware; allow HIGHMEM */
+		c->dcache.flags &= ~MIPS_CACHE_ALIASES;
+		break;
+
 	case CPU_LOONGSON2:
 		/*
 		 * LOONGSON2 has 4 way icache, but when using indexed cache op,
@@ -1328,6 +1352,31 @@
 	       c->dcache.linesz);
 }
 
+static void probe_vcache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config2, lsize;
+
+	if (current_cpu_type() != CPU_LOONGSON3)
+		return;
+
+	config2 = read_c0_config2();
+	if ((lsize = ((config2 >> 20) & 15)))
+		c->vcache.linesz = 2 << lsize;
+	else
+		c->vcache.linesz = lsize;
+
+	c->vcache.sets = 64 << ((config2 >> 24) & 15);
+	c->vcache.ways = 1 + ((config2 >> 16) & 15);
+
+	vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz;
+
+	c->vcache.waybit = 0;
+
+	pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
+		vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
+}
+
 /*
  * If you even _breathe_ on this function, look at the gcc output and make sure
  * it does not pop things on and off the stack for the cache sizing loop that
@@ -1650,6 +1699,7 @@
 	struct cpuinfo_mips *c = &current_cpu_data;
 
 	probe_pcache();
+	probe_vcache();
 	setup_scache();
 
 	r4k_blast_dcache_page_setup();
@@ -1671,7 +1721,7 @@
 	 * This code supports virtually indexed processors and will be
 	 * unnecessarily inefficient on physically indexed processors.
 	 */
-	if (c->dcache.linesz)
+	if (c->dcache.linesz && cpu_has_dc_aliases)
 		shm_align_mask = max_t( unsigned long,
 					c->dcache.sets * c->dcache.linesz - 1,
 					PAGE_SIZE - 1);
@@ -1744,12 +1794,24 @@
 		flush_icache_range = (void *)b5k_instruction_hazard;
 		local_flush_icache_range = (void *)b5k_instruction_hazard;
 
-		/* Cache aliases are handled in hardware; allow HIGHMEM */
-		current_cpu_data.dcache.flags &= ~MIPS_CACHE_ALIASES;
 
 		/* Optimization: an L2 flush implicitly flushes the L1 */
 		current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES;
 		break;
+	case CPU_LOONGSON3:
+		/* Loongson-3 maintains cache coherency by hardware */
+		__flush_cache_all	= cache_noop;
+		__flush_cache_vmap	= cache_noop;
+		__flush_cache_vunmap	= cache_noop;
+		__flush_kernel_vmap_range = (void *)cache_noop;
+		flush_cache_mm		= (void *)cache_noop;
+		flush_cache_page	= (void *)cache_noop;
+		flush_cache_range	= (void *)cache_noop;
+		flush_cache_sigtramp	= (void *)cache_noop;
+		flush_icache_all	= (void *)cache_noop;
+		flush_data_cache_page	= (void *)cache_noop;
+		local_flush_data_cache_page	= (void *)cache_noop;
+		break;
 	}
 }
 
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 3f159ca..bf04c6c 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 
 #include <asm/cacheflush.h>
+#include <asm/highmem.h>
 #include <asm/processor.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
@@ -83,8 +84,6 @@
 	struct address_space *mapping = page_mapping(page);
 	unsigned long addr;
 
-	if (PageHighMem(page))
-		return;
 	if (mapping && !mapping_mapped(mapping)) {
 		SetPageDcacheDirty(page);
 		return;
@@ -95,8 +94,15 @@
 	 * case is for exec env/arg pages and those are %99 certainly going to
 	 * get faulted into the tlb (and thus flushed) anyways.
 	 */
-	addr = (unsigned long) page_address(page);
+	if (PageHighMem(page))
+		addr = (unsigned long)kmap_atomic(page);
+	else
+		addr = (unsigned long)page_address(page);
+
 	flush_data_cache_page(addr);
+
+	if (PageHighMem(page))
+		__kunmap_atomic((void *)addr);
 }
 
 EXPORT_SYMBOL(__flush_dcache_page);
@@ -119,33 +125,28 @@
 
 EXPORT_SYMBOL(__flush_anon_page);
 
-void __flush_icache_page(struct vm_area_struct *vma, struct page *page)
-{
-	unsigned long addr;
-
-	if (PageHighMem(page))
-		return;
-
-	addr = (unsigned long) page_address(page);
-	flush_data_cache_page(addr);
-}
-EXPORT_SYMBOL_GPL(__flush_icache_page);
-
-void __update_cache(struct vm_area_struct *vma, unsigned long address,
-	pte_t pte)
+void __update_cache(unsigned long address, pte_t pte)
 {
 	struct page *page;
 	unsigned long pfn, addr;
-	int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc;
+	int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
 
 	pfn = pte_pfn(pte);
 	if (unlikely(!pfn_valid(pfn)))
 		return;
 	page = pfn_to_page(pfn);
-	if (page_mapping(page) && Page_dcache_dirty(page)) {
-		addr = (unsigned long) page_address(page);
+	if (Page_dcache_dirty(page)) {
+		if (PageHighMem(page))
+			addr = (unsigned long)kmap_atomic(page);
+		else
+			addr = (unsigned long)page_address(page);
+
 		if (exec || pages_do_alias(addr, address & PAGE_MASK))
 			flush_data_cache_page(addr);
+
+		if (PageHighMem(page))
+			__kunmap_atomic((void *)addr);
+
 		ClearPageDcacheDirty(page);
 	}
 }
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 730d394..cb557d2 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -88,19 +88,20 @@
 	else
 #endif
 #if defined(CONFIG_ZONE_DMA32) && defined(CONFIG_ZONE_DMA)
-	     if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
+	     if (dev == NULL || dev->coherent_dma_mask < DMA_BIT_MASK(32))
 			dma_flag = __GFP_DMA;
 	else if (dev->coherent_dma_mask < DMA_BIT_MASK(64))
 			dma_flag = __GFP_DMA32;
 	else
 #endif
 #if defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_ZONE_DMA)
-	     if (dev->coherent_dma_mask < DMA_BIT_MASK(64))
+	     if (dev == NULL || dev->coherent_dma_mask < DMA_BIT_MASK(64))
 		dma_flag = __GFP_DMA32;
 	else
 #endif
 #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32)
-	     if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8))
+	     if (dev == NULL ||
+		 dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8))
 		dma_flag = __GFP_DMA;
 	else
 #endif
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 7e5fa09..9b58eb5 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -98,8 +98,10 @@
 	idx += in_interrupt() ? FIX_N_COLOURS : 0;
 	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
 	pte = mk_pte(page, prot);
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#if defined(CONFIG_XPA)
 	entrylo = pte_to_entrylo(pte.pte_high);
+#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+	entrylo = pte.pte_high;
 #else
 	entrylo = pte_to_entrylo(pte_val(pte));
 #endif
@@ -110,9 +112,11 @@
 	write_c0_entrylo0(entrylo);
 	write_c0_entrylo1(entrylo);
 #ifdef CONFIG_XPA
-	entrylo = (pte.pte_low & _PFNX_MASK);
-	writex_c0_entrylo0(entrylo);
-	writex_c0_entrylo1(entrylo);
+	if (cpu_has_xpa) {
+		entrylo = (pte.pte_low & _PFNX_MASK);
+		writex_c0_entrylo0(entrylo);
+		writex_c0_entrylo1(entrylo);
+	}
 #endif
 	tlbidx = read_c0_wired();
 	write_c0_wired(tlbidx + 1);
@@ -196,7 +200,7 @@
 		if (cpu_has_dc_aliases)
 			SetPageDcacheDirty(page);
 	}
-	if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc)
+	if (vma->vm_flags & VM_EXEC)
 		flush_cache_page(vma, vaddr, page_to_pfn(page));
 }
 
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 885d73f..c41953c 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -188,6 +188,15 @@
 			}
 			break;
 
+		case CPU_LOONGSON3:
+			/* Loongson-3 only support the Pref_Load/Pref_Store. */
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 128;
+			pref_bias_copy_store = 128;
+			pref_src_mode = Pref_Load;
+			pref_dst_mode = Pref_Store;
+			break;
+
 		default:
 			pref_bias_clear_store = 128;
 			pref_bias_copy_load = 256;
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 91dec32..286a4d5 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -141,6 +141,7 @@
 	case CPU_P5600:
 	case CPU_BMIPS5000:
 	case CPU_QEMU_GENERIC:
+	case CPU_P6600:
 		if (config2 & (1 << 12))
 			return 0;
 	}
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index b4f366f..1290b99 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -43,7 +43,7 @@
 {
 	unsigned long old_ctx;
 
-	old_ctx = read_c0_entryhi() & ASID_MASK;
+	old_ctx = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
 	write_c0_entrylo0(0);
 	while (entry < current_cpu_data.tlbsize) {
 		write_c0_index(entry << 8);
@@ -81,6 +81,7 @@
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
 	struct mm_struct *mm = vma->vm_mm;
 	int cpu = smp_processor_id();
 
@@ -89,13 +90,13 @@
 
 #ifdef DEBUG_TLB
 		printk("[tlbrange<%lu,0x%08lx,0x%08lx>]",
-			cpu_context(cpu, mm) & ASID_MASK, start, end);
+			cpu_context(cpu, mm) & asid_mask, start, end);
 #endif
 		local_irq_save(flags);
 		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 		if (size <= current_cpu_data.tlbsize) {
-			int oldpid = read_c0_entryhi() & ASID_MASK;
-			int newpid = cpu_context(cpu, mm) & ASID_MASK;
+			int oldpid = read_c0_entryhi() & asid_mask;
+			int newpid = cpu_context(cpu, mm) & asid_mask;
 
 			start &= PAGE_MASK;
 			end += PAGE_SIZE - 1;
@@ -159,6 +160,7 @@
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
 	int cpu = smp_processor_id();
 
 	if (cpu_context(cpu, vma->vm_mm) != 0) {
@@ -168,10 +170,10 @@
 #ifdef DEBUG_TLB
 		printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page);
 #endif
-		newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK;
+		newpid = cpu_context(cpu, vma->vm_mm) & asid_mask;
 		page &= PAGE_MASK;
 		local_irq_save(flags);
-		oldpid = read_c0_entryhi() & ASID_MASK;
+		oldpid = read_c0_entryhi() & asid_mask;
 		write_c0_entryhi(page | newpid);
 		BARRIER;
 		tlb_probe();
@@ -190,6 +192,7 @@
 
 void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
 	unsigned long flags;
 	int idx, pid;
 
@@ -199,10 +202,10 @@
 	if (current->active_mm != vma->vm_mm)
 		return;
 
-	pid = read_c0_entryhi() & ASID_MASK;
+	pid = read_c0_entryhi() & asid_mask;
 
 #ifdef DEBUG_TLB
-	if ((pid != (cpu_context(cpu, vma->vm_mm) & ASID_MASK)) || (cpu_context(cpu, vma->vm_mm) == 0)) {
+	if ((pid != (cpu_context(cpu, vma->vm_mm) & asid_mask)) || (cpu_context(cpu, vma->vm_mm) == 0)) {
 		printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n",
 		       (cpu_context(cpu, vma->vm_mm)), pid);
 	}
@@ -228,6 +231,7 @@
 void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
 		     unsigned long entryhi, unsigned long pagemask)
 {
+	unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
 	unsigned long flags;
 	unsigned long old_ctx;
 	static unsigned long wired = 0;
@@ -243,7 +247,7 @@
 
 		local_irq_save(flags);
 		/* Save old context and create impossible VPN2 value */
-		old_ctx = read_c0_entryhi() & ASID_MASK;
+		old_ctx = read_c0_entryhi() & asid_mask;
 		old_pagemask = read_c0_pagemask();
 		w = read_c0_wired();
 		write_c0_wired(w + 1);
@@ -266,7 +270,7 @@
 #endif
 
 		local_irq_save(flags);
-		old_ctx = read_c0_entryhi() & ASID_MASK;
+		old_ctx = read_c0_entryhi() & asid_mask;
 		write_c0_entrylo0(entrylo0);
 		write_c0_entryhi(entryhi);
 		write_c0_index(wired);
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 2d93b63..e8b335c 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -28,25 +28,28 @@
 extern void build_tlb_refill_handler(void);
 
 /*
- * LOONGSON2/3 has a 4 entry itlb which is a subset of dtlb,
- * unfortunately, itlb is not totally transparent to software.
+ * LOONGSON-2 has a 4 entry itlb which is a subset of jtlb, LOONGSON-3 has
+ * a 4 entry itlb and a 4 entry dtlb which are subsets of jtlb. Unfortunately,
+ * itlb/dtlb are not totally transparent to software.
  */
-static inline void flush_itlb(void)
+static inline void flush_micro_tlb(void)
 {
 	switch (current_cpu_type()) {
 	case CPU_LOONGSON2:
+		write_c0_diag(LOONGSON_DIAG_ITLB);
+		break;
 	case CPU_LOONGSON3:
-		write_c0_diag(4);
+		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
 		break;
 	default:
 		break;
 	}
 }
 
-static inline void flush_itlb_vm(struct vm_area_struct *vma)
+static inline void flush_micro_tlb_vm(struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_EXEC)
-		flush_itlb();
+		flush_micro_tlb();
 }
 
 void local_flush_tlb_all(void)
@@ -93,7 +96,7 @@
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
 	htw_start();
-	flush_itlb();
+	flush_micro_tlb();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(local_flush_tlb_all);
@@ -159,7 +162,7 @@
 		} else {
 			drop_mmu_context(mm, cpu);
 		}
-		flush_itlb();
+		flush_micro_tlb();
 		local_irq_restore(flags);
 	}
 }
@@ -205,7 +208,7 @@
 	} else {
 		local_flush_tlb_all();
 	}
-	flush_itlb();
+	flush_micro_tlb();
 	local_irq_restore(flags);
 }
 
@@ -240,7 +243,7 @@
 	finish:
 		write_c0_entryhi(oldpid);
 		htw_start();
-		flush_itlb_vm(vma);
+		flush_micro_tlb_vm(vma);
 		local_irq_restore(flags);
 	}
 }
@@ -274,7 +277,7 @@
 	}
 	write_c0_entryhi(oldpid);
 	htw_start();
-	flush_itlb();
+	flush_micro_tlb();
 	local_irq_restore(flags);
 }
 
@@ -301,7 +304,7 @@
 	local_irq_save(flags);
 
 	htw_stop();
-	pid = read_c0_entryhi() & ASID_MASK;
+	pid = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
 	address &= (PAGE_MASK << 1);
 	write_c0_entryhi(address | pid);
 	pgdp = pgd_offset(vma->vm_mm, address);
@@ -336,10 +339,12 @@
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 #ifdef CONFIG_XPA
 		write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
-		writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK);
+		if (cpu_has_xpa)
+			writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK);
 		ptep++;
 		write_c0_entrylo1(pte_to_entrylo(ptep->pte_high));
-		writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK);
+		if (cpu_has_xpa)
+			writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK);
 #else
 		write_c0_entrylo0(ptep->pte_high);
 		ptep++;
@@ -357,7 +362,7 @@
 	}
 	tlbw_use_hazard();
 	htw_start();
-	flush_itlb_vm(vma);
+	flush_micro_tlb_vm(vma);
 	local_irq_restore(flags);
 }
 
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
index 138a2ec..e86e2e5 100644
--- a/arch/mips/mm/tlb-r8k.c
+++ b/arch/mips/mm/tlb-r8k.c
@@ -194,7 +194,7 @@
 	if (current->active_mm != vma->vm_mm)
 		return;
 
-	pid = read_c0_entryhi() & ASID_MASK;
+	pid = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
 
 	local_irq_save(flags);
 	address &= PAGE_MASK;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 84c6e3f..274da90 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -234,20 +234,16 @@
 	pr_debug("\n");
 
 	pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT);
-	pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT);
+	pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT);
 	pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT);
 	pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT);
 	pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT);
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 	pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
 #endif
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	if (cpu_has_rixi) {
 #ifdef _PAGE_NO_EXEC_SHIFT
+	if (cpu_has_rixi)
 		pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
-		pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT);
-#endif
-	}
 #endif
 	pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
 	pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
@@ -284,7 +280,12 @@
 #define C0_ENTRYLO1	3, 0
 #define C0_CONTEXT	4, 0
 #define C0_PAGEMASK	5, 0
+#define C0_PWBASE	5, 5
+#define C0_PWFIELD	5, 6
+#define C0_PWSIZE	5, 7
+#define C0_PWCTL	6, 6
 #define C0_BADVADDR	8, 0
+#define C0_PGD		9, 7
 #define C0_ENTRYHI	10, 0
 #define C0_EPC		14, 0
 #define C0_XCONTEXT	20, 0
@@ -630,6 +631,11 @@
 static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
 							unsigned int reg)
 {
+	if (_PAGE_GLOBAL_SHIFT == 0) {
+		/* pte_t is already in EntryLo format */
+		return;
+	}
+
 	if (cpu_has_rixi && _PAGE_NO_EXEC) {
 		if (fill_includes_sw_bits) {
 			UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
@@ -808,7 +814,10 @@
 
 	if (pgd_reg != -1) {
 		/* pgd is in pgd_reg */
-		UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
+		if (cpu_has_ldpte)
+			UASM_i_MFC0(p, ptr, C0_PWBASE);
+		else
+			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
 	} else {
 #if defined(CONFIG_MIPS_PGD_C0_CONTEXT)
 		/*
@@ -1007,39 +1016,40 @@
 
 static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
 {
-	/*
-	 * 64bit address support (36bit on a 32bit CPU) in a 32bit
-	 * Kernel is a special case. Only a few CPUs use it.
-	 */
-	if (config_enabled(CONFIG_PHYS_ADDR_T_64BIT) && !cpu_has_64bits) {
-		int pte_off_even = sizeof(pte_t) / 2;
-		int pte_off_odd = pte_off_even + sizeof(pte_t);
-#ifdef CONFIG_XPA
-		const int scratch = 1; /* Our extra working register */
+	int pte_off_even = 0;
+	int pte_off_odd = sizeof(pte_t);
 
-		uasm_i_addu(p, scratch, 0, ptep);
+#if defined(CONFIG_CPU_MIPS32) && defined(CONFIG_PHYS_ADDR_T_64BIT)
+	/* The low 32 bits of EntryLo is stored in pte_high */
+	pte_off_even += offsetof(pte_t, pte_high);
+	pte_off_odd += offsetof(pte_t, pte_high);
 #endif
+
+	if (config_enabled(CONFIG_XPA)) {
 		uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */
-		uasm_i_lw(p, ptep, pte_off_odd, ptep); /* odd pte */
 		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
-		UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));
 		UASM_i_MTC0(p, tmp, C0_ENTRYLO0);
-		UASM_i_MTC0(p, ptep, C0_ENTRYLO1);
-#ifdef CONFIG_XPA
-		uasm_i_lw(p, tmp, 0, scratch);
-		uasm_i_lw(p, ptep, sizeof(pte_t), scratch);
-		uasm_i_lui(p, scratch, 0xff);
-		uasm_i_ori(p, scratch, scratch, 0xffff);
-		uasm_i_and(p, tmp, scratch, tmp);
-		uasm_i_and(p, ptep, scratch, ptep);
-		uasm_i_mthc0(p, tmp, C0_ENTRYLO0);
-		uasm_i_mthc0(p, ptep, C0_ENTRYLO1);
-#endif
+
+		if (cpu_has_xpa && !mips_xpa_disabled) {
+			uasm_i_lw(p, tmp, 0, ptep);
+			uasm_i_ext(p, tmp, tmp, 0, 24);
+			uasm_i_mthc0(p, tmp, C0_ENTRYLO0);
+		}
+
+		uasm_i_lw(p, tmp, pte_off_odd, ptep); /* odd pte */
+		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, tmp, C0_ENTRYLO1);
+
+		if (cpu_has_xpa && !mips_xpa_disabled) {
+			uasm_i_lw(p, tmp, sizeof(pte_t), ptep);
+			uasm_i_ext(p, tmp, tmp, 0, 24);
+			uasm_i_mthc0(p, tmp, C0_ENTRYLO1);
+		}
 		return;
 	}
 
-	UASM_i_LW(p, tmp, 0, ptep); /* get even pte */
-	UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+	UASM_i_LW(p, tmp, pte_off_even, ptep); /* get even pte */
+	UASM_i_LW(p, ptep, pte_off_odd, ptep); /* get odd pte */
 	if (r45k_bvahwbug())
 		build_tlb_probe_entry(p);
 	build_convert_pte_to_entrylo(p, tmp);
@@ -1421,6 +1431,108 @@
 	dump_handler("r4000_tlb_refill", (u32 *)ebase, 64);
 }
 
+static void setup_pw(void)
+{
+	unsigned long pgd_i, pgd_w;
+#ifndef __PAGETABLE_PMD_FOLDED
+	unsigned long pmd_i, pmd_w;
+#endif
+	unsigned long pt_i, pt_w;
+	unsigned long pte_i, pte_w;
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	unsigned long psn;
+
+	psn = ilog2(_PAGE_HUGE);     /* bit used to indicate huge page */
+#endif
+	pgd_i = PGDIR_SHIFT;  /* 1st level PGD */
+#ifndef __PAGETABLE_PMD_FOLDED
+	pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_ORDER;
+
+	pmd_i = PMD_SHIFT;    /* 2nd level PMD */
+	pmd_w = PMD_SHIFT - PAGE_SHIFT;
+#else
+	pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_ORDER;
+#endif
+
+	pt_i  = PAGE_SHIFT;    /* 3rd level PTE */
+	pt_w  = PAGE_SHIFT - 3;
+
+	pte_i = ilog2(_PAGE_GLOBAL);
+	pte_w = 0;
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i);
+	write_c0_pwsize(1 << 30 | pgd_w << 24 | pmd_w << 12 | pt_w << 6 | pte_w);
+#else
+	write_c0_pwfield(pgd_i << 24 | pt_i << 6 | pte_i);
+	write_c0_pwsize(1 << 30 | pgd_w << 24 | pt_w << 6 | pte_w);
+#endif
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+	write_c0_pwctl(1 << 6 | psn);
+#endif
+	write_c0_kpgd(swapper_pg_dir);
+	kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */
+}
+
+static void build_loongson3_tlb_refill_handler(void)
+{
+	u32 *p = tlb_handler;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+
+	if (check_for_high_segbits) {
+		uasm_i_dmfc0(&p, K0, C0_BADVADDR);
+		uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
+		uasm_il_beqz(&p, &r, K1, label_vmalloc);
+		uasm_i_nop(&p);
+
+		uasm_il_bgez(&p, &r, K0, label_large_segbits_fault);
+		uasm_i_nop(&p);
+		uasm_l_vmalloc(&l, p);
+	}
+
+	uasm_i_dmfc0(&p, K1, C0_PGD);
+
+	uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+#ifndef __PAGETABLE_PMD_FOLDED
+	uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+#endif
+	uasm_i_ldpte(&p, K1, 0);      /* even */
+	uasm_i_ldpte(&p, K1, 1);      /* odd */
+	uasm_i_tlbwr(&p);
+
+	/* restore page mask */
+	if (PM_DEFAULT_MASK >> 16) {
+		uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16);
+		uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff);
+		uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+	} else if (PM_DEFAULT_MASK) {
+		uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK);
+		uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+	} else {
+		uasm_i_mtc0(&p, 0, C0_PAGEMASK);
+	}
+
+	uasm_i_eret(&p);
+
+	if (check_for_high_segbits) {
+		uasm_l_large_segbits_fault(&l, p);
+		UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
+		uasm_i_jr(&p, K1);
+		uasm_i_nop(&p);
+	}
+
+	uasm_resolve_relocs(relocs, labels);
+	memcpy((void *)(ebase + 0x80), tlb_handler, 0x80);
+	local_flush_icache_range(ebase + 0x80, ebase + 0x100);
+	dump_handler("loongson3_tlb_refill", (u32 *)(ebase + 0x80), 32);
+}
+
 extern u32 handle_tlbl[], handle_tlbl_end[];
 extern u32 handle_tlbs[], handle_tlbs_end[];
 extern u32 handle_tlbm[], handle_tlbm_end[];
@@ -1468,7 +1580,10 @@
 	} else {
 		/* PGD in c0_KScratch */
 		uasm_i_jr(&p, 31);
-		UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+		if (cpu_has_ldpte)
+			UASM_i_MTC0(&p, a0, C0_PWBASE);
+		else
+			UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
 	}
 #else
 #ifdef CONFIG_SMP
@@ -1523,19 +1638,19 @@
 
 static void
 iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
-	unsigned int mode)
+	unsigned int mode, unsigned int scratch)
 {
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
 	unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
+	unsigned int swmode = mode & ~hwmode;
 
-	if (!cpu_has_64bits) {
-		const int scratch = 1; /* Our extra working register */
-
-		uasm_i_lui(p, scratch, (mode >> 16));
+	if (config_enabled(CONFIG_XPA) && !cpu_has_64bits) {
+		uasm_i_lui(p, scratch, swmode >> 16);
 		uasm_i_or(p, pte, pte, scratch);
-	} else
-#endif
-	uasm_i_ori(p, pte, pte, mode);
+		BUG_ON(swmode & 0xffff);
+	} else {
+		uasm_i_ori(p, pte, pte, mode);
+	}
+
 #ifdef CONFIG_SMP
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (cpu_has_64bits)
@@ -1554,6 +1669,7 @@
 		/* no uasm_i_nop needed */
 		uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr);
 		uasm_i_ori(p, pte, pte, hwmode);
+		BUG_ON(hwmode & ~0xffff);
 		uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr);
 		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
 		/* no uasm_i_nop needed */
@@ -1575,6 +1691,7 @@
 	if (!cpu_has_64bits) {
 		uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr);
 		uasm_i_ori(p, pte, pte, hwmode);
+		BUG_ON(hwmode & ~0xffff);
 		uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr);
 		uasm_i_lw(p, pte, 0, ptr);
 	}
@@ -1615,9 +1732,8 @@
 			cur = t;
 		}
 		uasm_i_andi(p, t, cur,
-			(_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT);
-		uasm_i_xori(p, t, t,
-			(_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT);
+			(_PAGE_PRESENT | _PAGE_NO_READ) >> _PAGE_PRESENT_SHIFT);
+		uasm_i_xori(p, t, t, _PAGE_PRESENT >> _PAGE_PRESENT_SHIFT);
 		uasm_il_bnez(p, r, t, lid);
 		if (pte == t)
 			/* You lose the SMP race :-(*/
@@ -1628,11 +1744,11 @@
 /* Make PTE valid, store result in PTR. */
 static void
 build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,
-		 unsigned int ptr)
+		 unsigned int ptr, unsigned int scratch)
 {
 	unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED;
 
-	iPTE_SW(p, r, pte, ptr, mode);
+	iPTE_SW(p, r, pte, ptr, mode, scratch);
 }
 
 /*
@@ -1668,12 +1784,12 @@
  */
 static void
 build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,
-		 unsigned int ptr)
+		 unsigned int ptr, unsigned int scratch)
 {
 	unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID
 			     | _PAGE_DIRTY);
 
-	iPTE_SW(p, r, pte, ptr, mode);
+	iPTE_SW(p, r, pte, ptr, mode, scratch);
 }
 
 /*
@@ -1778,7 +1894,7 @@
 	build_r3000_tlbchange_handler_head(&p, K0, K1);
 	build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
 	uasm_i_nop(&p); /* load delay */
-	build_make_valid(&p, &r, K0, K1);
+	build_make_valid(&p, &r, K0, K1, -1);
 	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
 
 	uasm_l_nopage_tlbl(&l, p);
@@ -1809,7 +1925,7 @@
 	build_r3000_tlbchange_handler_head(&p, K0, K1);
 	build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
 	uasm_i_nop(&p); /* load delay */
-	build_make_write(&p, &r, K0, K1);
+	build_make_write(&p, &r, K0, K1, -1);
 	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
 
 	uasm_l_nopage_tlbs(&l, p);
@@ -1840,7 +1956,7 @@
 	build_r3000_tlbchange_handler_head(&p, K0, K1);
 	build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);
 	uasm_i_nop(&p); /* load delay */
-	build_make_write(&p, &r, K0, K1);
+	build_make_write(&p, &r, K0, K1, -1);
 	build_r3000_pte_reload_tlbwi(&p, K0, K1);
 
 	uasm_l_nopage_tlbm(&l, p);
@@ -2008,7 +2124,7 @@
 		}
 		uasm_l_tlbl_goaround1(&l, p);
 	}
-	build_make_valid(&p, &r, wr.r1, wr.r2);
+	build_make_valid(&p, &r, wr.r1, wr.r2, wr.r3);
 	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
@@ -2122,7 +2238,7 @@
 	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);
 	if (m4kc_tlbp_war())
 		build_tlb_probe_entry(&p);
-	build_make_write(&p, &r, wr.r1, wr.r2);
+	build_make_write(&p, &r, wr.r1, wr.r2, wr.r3);
 	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
@@ -2178,7 +2294,7 @@
 	if (m4kc_tlbp_war())
 		build_tlb_probe_entry(&p);
 	/* Present and writable bits set, set accessed and dirty bits. */
-	build_make_write(&p, &r, wr.r1, wr.r2);
+	build_make_write(&p, &r, wr.r1, wr.r2, wr.r3);
 	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2);
 
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
@@ -2311,9 +2427,7 @@
 	if (CONFIG_PGTABLE_LEVELS >= 3)
 		pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT;
 
-	/* If XPA has been enabled, PTEs are 64-bit in size. */
-	if (config_enabled(CONFIG_64BITS) || (read_c0_pagegrain() & PG_ELPA))
-		pwsize |= 1;
+	pwsize |= ilog2(sizeof(pte_t)/4) << MIPS_PWSIZE_PTEW_SHIFT;
 
 	write_c0_pwsize(pwsize);
 
@@ -2394,6 +2508,9 @@
 	 */
 	static int run_once = 0;
 
+	if (config_enabled(CONFIG_XPA) && !cpu_has_rixi)
+		panic("Kernels supporting XPA currently require CPUs with RIXI");
+
 	output_pgtable_bits_defines();
 	check_pabits();
 
@@ -2437,13 +2554,18 @@
 		break;
 
 	default:
+		if (cpu_has_ldpte)
+			setup_pw();
+
 		if (!run_once) {
 			scratch_reg = allocate_kscratch();
 			build_setup_pgd();
 			build_r4000_tlb_load_handler();
 			build_r4000_tlb_store_handler();
 			build_r4000_tlb_modify_handler();
-			if (!cpu_has_local_ebase)
+			if (cpu_has_ldpte)
+				build_loongson3_tlb_refill_handler();
+			else if (!cpu_has_local_ebase)
 				build_r4000_tlb_refill_handler();
 			flush_tlb_handlers();
 			run_once++;
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index b4a83789..9c2220a 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -153,6 +153,8 @@
 	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
 	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD },
 	{ insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD },
+	{ insn_ldpte, M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD },
+	{ insn_lddir, M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD },
 	{ insn_invalid, 0, 0 }
 };
 
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 319051c..ad718de 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -60,6 +60,7 @@
 	insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu,
 	insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi,
 	insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield,
+	insn_lddir, insn_ldpte,
 };
 
 struct insn {
@@ -335,6 +336,8 @@
 I_u1u2s3(_bbit1);
 I_u3u1u2(_lwx)
 I_u3u1u2(_ldx)
+I_u1u2(_ldpte)
+I_u2u1u3(_lddir)
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #include <asm/octeon/octeon.h>
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index 4740c82..33d5ff5 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -248,10 +248,15 @@
 #endif
 }
 
+void __init *plat_get_fdt(void)
+{
+	return (void *)__dtb_start;
+}
+
 void __init plat_mem_setup(void)
 {
 	unsigned int i;
-	void *fdt = __dtb_start;
+	void *fdt = plat_get_fdt();
 
 	fdt = malta_dt_shim(fdt);
 	__dt_setup_arch(fdt);
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index b7bf721..7407da0 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -21,6 +21,7 @@
 #include <linux/i8253.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
+#include <linux/math64.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
@@ -72,6 +73,8 @@
 {
 	unsigned long flags;
 	unsigned int count, start;
+	unsigned char secs1, secs2, ctrl;
+	int secs;
 	cycle_t giccount = 0, gicstart = 0;
 
 #if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ
@@ -81,32 +84,51 @@
 
 	local_irq_save(flags);
 
-	/* Start counter exactly on falling edge of update flag. */
-	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
-	while (!(CMOS_READ(RTC_REG_A) & RTC_UIP));
-
-	/* Initialize counters. */
-	start = read_c0_count();
-	if (gic_present) {
+	if (gic_present)
 		gic_start_count();
-		gicstart = gic_read_count();
-	}
 
-	/* Read counter exactly on falling edge of update flag. */
+	/*
+	 * Read counters exactly on rising edge of update flag.
+	 * This helps get an accurate reading under virtualisation.
+	 */
 	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
 	while (!(CMOS_READ(RTC_REG_A) & RTC_UIP));
+	start = read_c0_count();
+	if (gic_present)
+		gicstart = gic_read_count();
 
+	/* Wait for falling edge before reading RTC. */
+	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
+	secs1 = CMOS_READ(RTC_SECONDS);
+
+	/* Read counters again exactly on rising edge of update flag. */
+	while (!(CMOS_READ(RTC_REG_A) & RTC_UIP));
 	count = read_c0_count();
 	if (gic_present)
 		giccount = gic_read_count();
 
+	/* Wait for falling edge before reading RTC again. */
+	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
+	secs2 = CMOS_READ(RTC_SECONDS);
+
+	ctrl = CMOS_READ(RTC_CONTROL);
+
 	local_irq_restore(flags);
 
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		secs1 = bcd2bin(secs1);
+		secs2 = bcd2bin(secs2);
+	}
+	secs = secs2 - secs1;
+	if (secs < 1)
+		secs += 60;
+
 	count -= start;
+	count /= secs;
 	mips_hpt_frequency = count;
 
 	if (gic_present) {
-		giccount -= gicstart;
+		giccount = div_u64(giccount - gicstart, secs);
 		gic_frequency = giccount;
 	}
 }
diff --git a/arch/mips/mti-sead3/sead3-setup.c b/arch/mips/mti-sead3/sead3-setup.c
index e43f480..9f2f9b2 100644
--- a/arch/mips/mti-sead3/sead3-setup.c
+++ b/arch/mips/mti-sead3/sead3-setup.c
@@ -83,6 +83,11 @@
 	}
 }
 
+void __init *plat_get_fdt(void)
+{
+	return (void *)__dtb_start;
+}
+
 void __init plat_mem_setup(void)
 {
 	/* allow command line/bootloader env to override memory size in DT */
diff --git a/arch/mips/netlogic/common/reset.S b/arch/mips/netlogic/common/reset.S
index edbab9b..c474981 100644
--- a/arch/mips/netlogic/common/reset.S
+++ b/arch/mips/netlogic/common/reset.S
@@ -50,7 +50,6 @@
 #include <asm/netlogic/xlp-hal/sys.h>
 #include <asm/netlogic/xlp-hal/cpucontrol.h>
 
-#define CP0_EBASE	$15
 #define SYS_CPU_COHERENT_BASE	CKSEG1ADDR(XLP_DEFAULT_IO_BASE) + \
 			XLP_IO_SYS_OFFSET(0) + XLP_IO_PCI_HDRSZ + \
 			SYS_CPU_NONCOHERENT_MODE * 4
@@ -92,7 +91,7 @@
  * registers. On XLPII CPUs, usual cache instructions work.
  */
 .macro	xlp_flush_l1_dcache
-	mfc0	t0, CP0_EBASE, 0
+	mfc0	t0, CP0_PRID
 	andi	t0, t0, PRID_IMP_MASK
 	slt	t1, t0, 0x1200
 	beqz	t1, 15f
@@ -171,7 +170,7 @@
 	nop
 
 1:	/* Entry point on core wakeup */
-	mfc0	t0, CP0_EBASE, 0	/* processor ID */
+	mfc0	t0, CP0_PRID		/* processor ID */
 	andi	t0, PRID_IMP_MASK
 	li	t1, 0x1500		/* XLP 9xx */
 	beq	t0, t1, 2f		/* does not need to set coherent */
@@ -182,8 +181,8 @@
 	nop
 
 	/* set bit in SYS coherent register for the core */
-	mfc0	t0, CP0_EBASE, 1
-	mfc0	t1, CP0_EBASE, 1
+	mfc0	t0, CP0_EBASE
+	mfc0	t1, CP0_EBASE
 	srl	t1, 5
 	andi	t1, 0x3			/* t1 <- node */
 	li	t2, 0x40000
@@ -232,7 +231,7 @@
 
 	 * NOTE: All GPR contents are lost after the mtcr above!
 	 */
-	mfc0	v0, CP0_EBASE, 1
+	mfc0	v0, CP0_EBASE
 	andi	v0, 0x3ff		/* v0 <- node/core */
 
 	/*
diff --git a/arch/mips/netlogic/common/smpboot.S b/arch/mips/netlogic/common/smpboot.S
index 805355b..f0cc4c9 100644
--- a/arch/mips/netlogic/common/smpboot.S
+++ b/arch/mips/netlogic/common/smpboot.S
@@ -48,8 +48,6 @@
 #include <asm/netlogic/xlp-hal/sys.h>
 #include <asm/netlogic/xlp-hal/cpucontrol.h>
 
-#define CP0_EBASE	$15
-
 	.set	noreorder
 	.set	noat
 	.set	arch=xlr		/* for mfcr/mtcr, XLR is sufficient */
@@ -86,7 +84,7 @@
 	PTR_L	gp, 0(t1)
 
 	/* a0 has the processor id */
-	mfc0	a0, CP0_EBASE, 1
+	mfc0	a0, CP0_EBASE
 	andi	a0, 0x3ff		/* a0 <- node/core */
 	PTR_LA	t0, nlm_early_init_secondary
 	jalr	t0
diff --git a/arch/mips/netlogic/xlp/nlm_hal.c b/arch/mips/netlogic/xlp/nlm_hal.c
index 80ec929..25ee694 100644
--- a/arch/mips/netlogic/xlp/nlm_hal.c
+++ b/arch/mips/netlogic/xlp/nlm_hal.c
@@ -58,7 +58,7 @@
 		nodep->coremask = 1;	/* node 0, boot cpu */
 	nodep->sysbase = nlm_get_sys_regbase(node);
 	nodep->picbase = nlm_get_pic_regbase(node);
-	nodep->ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	nodep->ebase = read_c0_ebase() & MIPS_EBASE_BASE;
 	if (cpu_is_xlp9xx())
 		nodep->socbus = xlp9xx_get_socbus(node);
 	else
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
index d118b9a..72ceddc 100644
--- a/arch/mips/netlogic/xlr/setup.c
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -168,7 +168,7 @@
 
 	nodep = nlm_current_node();
 	nodep->picbase = nlm_mmio_base(NETLOGIC_IO_PIC_OFFSET);
-	nodep->ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	nodep->ebase = read_c0_ebase() & MIPS_EBASE_BASE;
 	spin_lock_init(&nodep->piclock);
 }
 
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 3c9ec3d..2f33992 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -77,7 +77,7 @@
 	struct op_mips_model *lmodel = NULL;
 	int res;
 
-	switch (current_cpu_type()) {
+	switch (boot_cpu_type()) {
 	case CPU_5KC:
 	case CPU_M14KC:
 	case CPU_M14KEC:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 8f988a6..45cb274 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -269,11 +269,9 @@
 	return handled;
 }
 
-#define M_CONFIG1_PC	(1 << 4)
-
 static inline int __n_counters(void)
 {
-	if (!(read_c0_config1() & M_CONFIG1_PC))
+	if (!cpu_has_perf)
 		return 0;
 	if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
 		return 1;
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index c2ce41e..2b5427d 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
 #include <linux/of_irq.h>
diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c
index e5738ee..f51e108 100644
--- a/arch/mips/pci/ops-lantiq.c
+++ b/arch/mips/pci/ops-lantiq.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #include <linux/types.h>
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index 2895263..c8994c1 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -76,7 +76,7 @@
 	unsigned long old_ctx;
 
 	/* Save old context and create impossible VPN2 value */
-	old_ctx = read_c0_entryhi() & 0xff;
+	old_ctx = read_c0_entryhi() & MIPS_ENTRYHI_ASID;
 	old_pagemask = read_c0_pagemask();
 	write_c0_index(entry);
 	write_c0_pagemask(pagemask);
diff --git a/arch/mips/pci/pci-ip32.c b/arch/mips/pci/pci-ip32.c
index b1e061f..7ae89d0 100644
--- a/arch/mips/pci/pci-ip32.c
+++ b/arch/mips/pci/pci-ip32.c
@@ -116,7 +116,6 @@
 	.pci_ops	= &mace_pci_ops,
 	.mem_resource	= &mace_pci_mem_resource,
 	.io_resource	= &mace_pci_io_resource,
-	.iommu		= 0,
 	.mem_offset	= MACE_PCI_MEM_OFFSET,
 	.io_offset	= 0,
 	.io_map_base	= CKSEG1ADDR(MACEPCI_LOW_IO),
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 6a15dbd..b9deab1 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #include <linux/types.h>
diff --git a/arch/mips/pci/pci-lantiq.h b/arch/mips/pci/pci-lantiq.h
index 66bf6cd..0cc7125 100644
--- a/arch/mips/pci/pci-lantiq.h
+++ b/arch/mips/pci/pci-lantiq.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 John Crispin <john@phrozen.org>
  */
 
 #ifndef _LTQ_PCI_H__
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 1ae932c..6ce8162 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -2,7 +2,7 @@
  *  Ralink MT7620A SoC PCI support
  *
  *  Copyright (C) 2007-2013 Bruce Chang (Mediatek)
- *  Copyright (C) 2013-2016 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2013-2016 John Crispin <john@phrozen.org>
  *
  *  This program is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License version 2 as published
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index a245cad..f2a1050 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -1,7 +1,7 @@
 /*
  *  Ralink RT288x SoC PCI register definitions
  *
- *  Copyright (C) 2009 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2009 John Crispin <john@phrozen.org>
  *  Copyright (C) 2009 Gabor Juhos <juhosg@openwrt.org>
  *
  *  Parts of this file are based on Ralink's 2.6.21 BSP
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index b8a0bf5..f1b11f0 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -83,9 +83,6 @@
 	LIST_HEAD(resources);
 	struct pci_bus *bus;
 
-	if (!hose->iommu)
-		PCI_DMA_BUS_IS_PHYS = 1;
-
 	if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
 		next_busno = (*hose->get_busno)();
 
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
index ca6a62b..62a0a78 100644
--- a/arch/mips/pic32/pic32mzda/time.c
+++ b/arch/mips/pic32/pic32mzda/time.c
@@ -11,13 +11,12 @@
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  */
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/init.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/irqdomain.h>
 
 #include <asm/time.h>
 
@@ -58,16 +57,12 @@
 
 void __init plat_time_init(void)
 {
-	struct clk *clk;
+	unsigned long rate = pic32_get_pbclk(7);
 
 	of_clk_init(NULL);
-	clk = clk_get_sys("cpu_clk", NULL);
-	if (IS_ERR(clk))
-		panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
 
-	clk_prepare_enable(clk);
-	pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
-	mips_hpt_frequency = clk_get_rate(clk) / 2;
+	pr_info("CPU Clock: %ldMHz\n", rate / 1000000);
+	mips_hpt_frequency = rate / 2;
 
 	clocksource_probe();
 }
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index 96ba2cc..956c92e 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -2,6 +2,7 @@
  * Pistachio platform setup
  *
  * Copyright (C) 2014 Google, Inc.
+ * Copyright (C) 2016 Imagination Technologies
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -9,6 +10,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
@@ -24,9 +26,38 @@
 #include <asm/smp-ops.h>
 #include <asm/traps.h>
 
+/*
+ * Core revision register decoding
+ * Bits 23 to 20: Major rev
+ * Bits 15 to 8: Minor rev
+ * Bits 7 to 0: Maintenance rev
+ */
+#define PISTACHIO_CORE_REV_REG	0xB81483D0
+#define PISTACHIO_CORE_REV_A1	0x00100006
+#define PISTACHIO_CORE_REV_B0	0x00100106
+
 const char *get_system_type(void)
 {
-	return "IMG Pistachio SoC";
+	u32 core_rev;
+	const char *sys_type;
+
+	core_rev = __raw_readl((const void *)PISTACHIO_CORE_REV_REG);
+
+	switch (core_rev) {
+	case PISTACHIO_CORE_REV_B0:
+		sys_type = "IMG Pistachio SoC (B0)";
+		break;
+
+	case PISTACHIO_CORE_REV_A1:
+		sys_type = "IMG Pistachio SoC (A1)";
+		break;
+
+	default:
+		sys_type = "IMG Pistachio SoC";
+		break;
+	}
+
+	return sys_type;
 }
 
 static void __init plat_setup_iocoherency(void)
@@ -109,6 +140,8 @@
 	mips_cm_probe();
 	mips_cpc_probe();
 	register_cps_smp_ops();
+
+	pr_info("SoC Type: %s\n", get_system_type());
 }
 
 void __init prom_free_prom_memory(void)
diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c
index 9d293b3..a63b736 100644
--- a/arch/mips/pmcs-msp71xx/msp_setup.c
+++ b/arch/mips/pmcs-msp71xx/msp_setup.c
@@ -118,7 +118,7 @@
 	/* No chip-specific reset code, just jump to the ROM reset vector */
 	set_c0_status(ST0_BEV | ST0_ERL);
 	change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED);
-	flush_cache_all();
+	__flush_cache_all();
 	write_c0_wired(0);
 
 	__asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
diff --git a/arch/mips/pnx833x/common/setup.c b/arch/mips/pnx833x/common/setup.c
index 99b4d94..8a7443b 100644
--- a/arch/mips/pnx833x/common/setup.c
+++ b/arch/mips/pnx833x/common/setup.c
@@ -38,9 +38,6 @@
 
 int __init plat_mem_setup(void)
 {
-	/* fake pci bus to avoid bounce buffers */
-	PCI_DMA_BUS_IS_PHYS = 1;
-
 	/* set mips clock to 320MHz */
 #if defined(CONFIG_SOC_PNX8335)
 	PNX8335_WRITEFIELD(0x17, CLOCK_PLL_CPU_CTL, FREQ);
diff --git a/arch/mips/ralink/Makefile b/arch/mips/ralink/Makefile
index 0d1795a..fe34715 100644
--- a/arch/mips/ralink/Makefile
+++ b/arch/mips/ralink/Makefile
@@ -4,7 +4,7 @@
 # Makefile for the Ralink common stuff
 #
 # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
-# Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+# Copyright (C) 2013 John Crispin <john@phrozen.org>
 
 obj-y := prom.o of.o reset.o
 
diff --git a/arch/mips/ralink/bootrom.c b/arch/mips/ralink/bootrom.c
index 5403468..e1fa597 100644
--- a/arch/mips/ralink/bootrom.c
+++ b/arch/mips/ralink/bootrom.c
@@ -3,7 +3,7 @@
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/debugfs.h>
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index e46f91f..3ad0b07 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2013 by John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 by John Crispin <john@phrozen.org>
  */
 
 #include <linux/clockchips.h>
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c
index 25c4a61..ebaa7cc 100644
--- a/arch/mips/ralink/clk.c
+++ b/arch/mips/ralink/clk.c
@@ -4,7 +4,7 @@
  *  by the Free Software Foundation.
  *
  *  Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
- *  Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/ralink/common.h b/arch/mips/ralink/common.h
index 8e7d8e6..b8245d0 100644
--- a/arch/mips/ralink/common.h
+++ b/arch/mips/ralink/common.h
@@ -3,7 +3,7 @@
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
  *
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #ifndef _RALINK_COMMON_H__
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index e10d10b..765d5ba 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -3,7 +3,7 @@
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/interrupt.h>
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
index 50d6c55..2058280 100644
--- a/arch/mips/ralink/irq-gic.c
+++ b/arch/mips/ralink/irq-gic.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
- * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2015 John Crispin <john@phrozen.org>
  */
 
 #include <linux/init.h>
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index 4cf77f3..4911c14 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2009 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/io.h>
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index 0d3d1a9..88b82fe 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
@@ -581,11 +581,14 @@
 		(rev & CHIP_REV_ECO_MASK));
 
 	cfg0 = __raw_readl(sysc + SYSC_REG_SYSTEM_CONFIG0);
-	if (is_mt76x8())
+	if (is_mt76x8()) {
 		dram_type = cfg0 & DRAM_TYPE_MT7628_MASK;
-	else
+	} else {
 		dram_type = (cfg0 >> SYSCFG0_DRAM_TYPE_SHIFT) &
 			    SYSCFG0_DRAM_TYPE_MASK;
+		if (dram_type == SYSCFG0_DRAM_TYPE_UNKNOWN)
+			dram_type = SYSCFG0_DRAM_TYPE_SDRAM;
+	}
 
 	soc_info->mem_base = MT7620_DRAM_BASE;
 	if (is_mt76x8())
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
index e9b9fa3..a45bbbe 100644
--- a/arch/mips/ralink/mt7621.c
+++ b/arch/mips/ralink/mt7621.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
- * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2015 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index f9eda5d..0aa67a2 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/io.h>
diff --git a/arch/mips/ralink/prom.c b/arch/mips/ralink/prom.c
index 39a9142f..5a73c5e 100644
--- a/arch/mips/ralink/prom.c
+++ b/arch/mips/ralink/prom.c
@@ -5,7 +5,7 @@
  *
  *  Copyright (C) 2009 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2010 Joonas Lahtinen <joonas.lahtinen@gmail.com>
- *  Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/string.h>
diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c
index ee117c4..64543d6 100644
--- a/arch/mips/ralink/reset.c
+++ b/arch/mips/ralink/reset.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/pm.h>
@@ -61,7 +61,7 @@
 	return ralink_deassert_device(rcdev, id);
 }
 
-static struct reset_control_ops reset_ops = {
+static const struct reset_control_ops reset_ops = {
 	.reset = ralink_reset_device,
 	.assert = ralink_assert_device,
 	.deassert = ralink_deassert_device,
diff --git a/arch/mips/ralink/rt288x.c b/arch/mips/ralink/rt288x.c
index 3c84166..285796e 100644
--- a/arch/mips/ralink/rt288x.c
+++ b/arch/mips/ralink/rt288x.c
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/ralink/rt305x.c b/arch/mips/ralink/rt305x.c
index d7c4ba4..c8a28c4b 100644
--- a/arch/mips/ralink/rt305x.c
+++ b/arch/mips/ralink/rt305x.c
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c
index fafec94..4cef916 100644
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -7,7 +7,7 @@
  *
  * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
  */
 
 #include <linux/kernel.h>
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
index 5b4f186..069771d 100644
--- a/arch/mips/ralink/timer-gic.c
+++ b/arch/mips/ralink/timer-gic.c
@@ -4,7 +4,7 @@
  * by the Free Software Foundation.
  *
  * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
- * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2015 John Crispin <john@phrozen.org>
  */
 
 #include <linux/init.h>
diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c
index 82c72a1..b0343ff 100644
--- a/arch/mips/ralink/timer.c
+++ b/arch/mips/ralink/timer.c
@@ -3,7 +3,7 @@
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2013 John Crispin <john@phrozen.org>
 */
 
 #include <linux/module.h>
@@ -180,5 +180,5 @@
 module_platform_driver(rt_timer_driver);
 
 MODULE_DESCRIPTION("Ralink RT2880 timer");
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org");
+MODULE_AUTHOR("John Crispin <john@phrozen.org");
 MODULE_LICENSE("GPL");
diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig
index cb9a095..707b884 100644
--- a/arch/mips/sibyte/Kconfig
+++ b/arch/mips/sibyte/Kconfig
@@ -143,7 +143,8 @@
 config SIBYTE_BUS_WATCHER
 	bool "Support for Bus Watcher statistics"
 	depends on SIBYTE_SB1xxx_SOC && \
-		(SIBYTE_BCM112X || SIBYTE_SB1250)
+		(SIBYTE_BCM112X || SIBYTE_SB1250 || \
+		 SIBYTE_BCM1x55 || SIBYTE_BCM1x80)
 	help
 	  Handle and keep statistics on the bus error interrupts (COR_ECC,
 	  BAD_ECC, IO_BUS).
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index ee3617c..b369509 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -50,13 +50,17 @@
       cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
                    -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
 
+# Strip rule for the raw .so files
+$(obj)/%.so.raw: OBJCOPYFLAGS := -S
+$(obj)/%.so.raw: $(obj)/%.so.dbg.raw FORCE
+	$(call if_changed,objcopy)
+
 hostprogs-y := genvdso
 
 quiet_cmd_genvdso = GENVDSO $@
 define cmd_genvdso
-	cp $< $(<:%.dbg=%) && \
-	$(OBJCOPY) -S $< $(<:%.dbg=%) && \
-	$(obj)/genvdso $< $(<:%.dbg=%) $@ $(VDSO_NAME)
+	$(foreach file,$(filter %.raw,$^),cp $(file) $(file:%.raw=%) &&) \
+	$(obj)/genvdso $(<:%.raw=%) $(<:%.dbg.raw=%) $@ $(VDSO_NAME)
 endef
 
 #
@@ -66,7 +70,10 @@
 native-abi := $(filter -mabi=%,$(KBUILD_CFLAGS))
 
 targets += $(obj-vdso-y)
-targets += vdso.lds vdso.so.dbg vdso.so vdso-image.c
+targets += vdso.lds
+targets += vdso.so.dbg.raw vdso.so.raw
+targets += vdso.so.dbg vdso.so
+targets += vdso-image.c
 
 obj-vdso := $(obj-vdso-y:%.o=$(obj)/%.o)
 
@@ -75,10 +82,11 @@
 
 $(obj)/vdso.lds: KBUILD_CPPFLAGS := $(native-abi)
 
-$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
+$(obj)/vdso.so.dbg.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
 
-$(obj)/vdso-image.c: $(obj)/vdso.so.dbg $(obj)/genvdso FORCE
+$(obj)/vdso-image.c: $(obj)/vdso.so.dbg.raw $(obj)/vdso.so.raw \
+                     $(obj)/genvdso FORCE
 	$(call if_changed,genvdso)
 
 obj-y += vdso-image.o
@@ -89,7 +97,10 @@
 
 # Define these outside the ifdef to ensure they are picked up by clean.
 targets += $(obj-vdso-y:%.o=%-o32.o)
-targets += vdso-o32.lds vdso-o32.so.dbg vdso-o32.so vdso-o32-image.c
+targets += vdso-o32.lds
+targets += vdso-o32.so.dbg.raw vdso-o32.so.raw
+targets += vdso-o32.so.dbg vdso-o32.so
+targets += vdso-o32-image.c
 
 ifdef CONFIG_MIPS32_O32
 
@@ -109,11 +120,12 @@
 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE
 	$(call if_changed_dep,cpp_lds_S)
 
-$(obj)/vdso-o32.so.dbg: $(obj)/vdso-o32.lds $(obj-vdso-o32) FORCE
+$(obj)/vdso-o32.so.dbg.raw: $(obj)/vdso-o32.lds $(obj-vdso-o32) FORCE
 	$(call if_changed,vdsold)
 
 $(obj)/vdso-o32-image.c: VDSO_NAME := o32
-$(obj)/vdso-o32-image.c: $(obj)/vdso-o32.so.dbg $(obj)/genvdso FORCE
+$(obj)/vdso-o32-image.c: $(obj)/vdso-o32.so.dbg.raw $(obj)/vdso-o32.so.raw \
+                         $(obj)/genvdso FORCE
 	$(call if_changed,genvdso)
 
 obj-y += vdso-o32-image.o
@@ -125,7 +137,10 @@
 #
 
 targets += $(obj-vdso-y:%.o=%-n32.o)
-targets += vdso-n32.lds vdso-n32.so.dbg vdso-n32.so vdso-n32-image.c
+targets += vdso-n32.lds
+targets += vdso-n32.so.dbg.raw vdso-n32.so.raw
+targets += vdso-n32.so.dbg vdso-n32.so
+targets += vdso-n32-image.c
 
 ifdef CONFIG_MIPS32_N32
 
@@ -145,11 +160,12 @@
 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE
 	$(call if_changed_dep,cpp_lds_S)
 
-$(obj)/vdso-n32.so.dbg: $(obj)/vdso-n32.lds $(obj-vdso-n32) FORCE
+$(obj)/vdso-n32.so.dbg.raw: $(obj)/vdso-n32.lds $(obj-vdso-n32) FORCE
 	$(call if_changed,vdsold)
 
 $(obj)/vdso-n32-image.c: VDSO_NAME := n32
-$(obj)/vdso-n32-image.c: $(obj)/vdso-n32.so.dbg $(obj)/genvdso FORCE
+$(obj)/vdso-n32-image.c: $(obj)/vdso-n32.so.dbg.raw $(obj)/vdso-n32.so.raw \
+                         $(obj)/genvdso FORCE
 	$(call if_changed,genvdso)
 
 obj-y += vdso-n32-image.o
diff --git a/arch/mips/vr41xx/common/pmu.c b/arch/mips/vr41xx/common/pmu.c
index d7f7558..39a0db3 100644
--- a/arch/mips/vr41xx/common/pmu.c
+++ b/arch/mips/vr41xx/common/pmu.c
@@ -73,7 +73,7 @@
 	default:
 		set_c0_status(ST0_BEV | ST0_ERL);
 		change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED);
-		flush_cache_all();
+		__flush_cache_all();
 		write_c0_wired(0);
 		__asm__("jr	%0"::"r"(0xbfc00000));
 		break;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d7b3431..ec35af3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -40,6 +40,9 @@
 #define KVM_MAX_VCORES		NR_CPUS
 #define KVM_USER_MEM_SLOTS	512
 
+#include <asm/cputhreads.h>
+#define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)
+
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
@@ -113,6 +116,7 @@
 	u32 ext_intr_exits;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 	u32 dbell_exits;
 	u32 gdbell_exits;
@@ -724,5 +728,6 @@
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b34220d..47018fc 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -54,6 +54,7 @@
 	{ "queue_intr",  VCPU_STAT(queue_intr) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
+	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "pf_storage",  VCPU_STAT(pf_storage) },
 	{ "sp_storage",  VCPU_STAT(sp_storage) },
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 84fb4fc..9324355 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/spinlock.h>
 #include <linux/page-flags.h>
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 95bceca..8129b0d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -882,6 +882,24 @@
 }
 #endif
 
+static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		u64 msr = kvmppc_get_msr(vcpu);
+
+		kvmppc_set_msr(vcpu, msr | MSR_SE);
+	}
+}
+
+static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		u64 msr = kvmppc_get_msr(vcpu);
+
+		kvmppc_set_msr(vcpu, msr & ~MSR_SE);
+	}
+}
+
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			  unsigned int exit_nr)
 {
@@ -1207,10 +1225,18 @@
 		break;
 #endif
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
-	case BOOK3S_INTERRUPT_TRACE:
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		r = RESUME_GUEST;
 		break;
+	case BOOK3S_INTERRUPT_TRACE:
+		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+			run->exit_reason = KVM_EXIT_DEBUG;
+			r = RESUME_HOST;
+		} else {
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			r = RESUME_GUEST;
+		}
+		break;
 	default:
 	{
 		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
@@ -1479,6 +1505,8 @@
 		goto out;
 	}
 
+	kvmppc_setup_debug(vcpu);
+
 	/*
 	 * Interrupts could be timers for the guest which we have to inject
 	 * again, so let's postpone them until we're in the guest and if we
@@ -1501,6 +1529,8 @@
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
+	kvmppc_clear_debug(vcpu);
+
 	/* No need for kvm_guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 46871d5..a75ba38 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -92,7 +92,7 @@
 	 * we are the only setter, thus concurrent access is undefined
 	 * to begin with.
 	 */
-	if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL)
+	if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
 		state->asserted = 1;
 	else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
 		state->asserted = 0;
@@ -280,7 +280,7 @@
 	if (!success)
 		goto bail;
 
-	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+	XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
 		 icp->server_num,
 		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
 		 old.need_resend, old.out_ee);
@@ -336,7 +336,7 @@
 	union kvmppc_icp_state old_state, new_state;
 	bool success;
 
-	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+	XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n", irq, priority,
 		 icp->server_num);
 
 	do {
@@ -1174,9 +1174,11 @@
 			prio = irqp->saved_priority;
 		}
 		val |= prio << KVM_XICS_PRIORITY_SHIFT;
-		if (irqp->asserted)
-			val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
-		else if (irqp->masked_pending || irqp->resend)
+		if (irqp->lsi) {
+			val |= KVM_XICS_LEVEL_SENSITIVE;
+			if (irqp->asserted)
+				val |= KVM_XICS_PENDING;
+		} else if (irqp->masked_pending || irqp->resend)
 			val |= KVM_XICS_PENDING;
 		ret = 0;
 	}
@@ -1228,9 +1230,13 @@
 	irqp->priority = prio;
 	irqp->resend = 0;
 	irqp->masked_pending = 0;
+	irqp->lsi = 0;
 	irqp->asserted = 0;
-	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
-		irqp->asserted = 1;
+	if (val & KVM_XICS_LEVEL_SENSITIVE) {
+		irqp->lsi = 1;
+		if (val & KVM_XICS_PENDING)
+			irqp->asserted = 1;
+	}
 	irqp->exists = 1;
 	arch_spin_unlock(&ics->lock);
 	local_irq_restore(flags);
@@ -1249,11 +1255,10 @@
 	return ics_deliver_irq(xics, irq, level);
 }
 
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
-		int irq_source_id, int level, bool line_status)
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
+			      struct kvm *kvm, int irq_source_id,
+			      int level, bool line_status)
 {
-	if (!level)
-		return -1;
 	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
 			   level, line_status);
 }
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 56ea44f..a46b954 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -39,6 +39,7 @@
 	u8  saved_priority;
 	u8  resend;
 	u8  masked_pending;
+	u8  lsi;		/* level-sensitive interrupt */
 	u8  asserted; /* Only for LSI */
 	u8  exists;
 };
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4d66f44..4afae69 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -64,6 +64,7 @@
 	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "doorbell", VCPU_STAT(dbell_exits) },
 	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6a68730..02416fe 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -800,9 +800,9 @@
 	}
 }
 
-int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
-		       unsigned int rt, unsigned int bytes,
-		       int is_default_endian)
+static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				unsigned int rt, unsigned int bytes,
+				int is_default_endian, int sign_extend)
 {
 	int idx, ret;
 	bool host_swabbed;
@@ -827,7 +827,7 @@
 	vcpu->arch.mmio_host_swabbed = host_swabbed;
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_is_write = 0;
-	vcpu->arch.mmio_sign_extend = 0;
+	vcpu->arch.mmio_sign_extend = sign_extend;
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
@@ -844,6 +844,13 @@
 
 	return EMULATE_DO_MMIO;
 }
+
+int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+		       unsigned int rt, unsigned int bytes,
+		       int is_default_endian)
+{
+	return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0);
+}
 EXPORT_SYMBOL_GPL(kvmppc_handle_load);
 
 /* Same as above, but sign extends */
@@ -851,12 +858,7 @@
 			unsigned int rt, unsigned int bytes,
 			int is_default_endian)
 {
-	int r;
-
-	vcpu->arch.mmio_sign_extend = 1;
-	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
-
-	return r;
+	return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
 }
 
 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6da41fa..37b9017 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -38,7 +38,7 @@
  */
 #define KVM_NR_IRQCHIPS 1
 #define KVM_IRQCHIP_NUM_PINS 4096
-#define KVM_HALT_POLL_NS_DEFAULT 0
+#define KVM_HALT_POLL_NS_DEFAULT 80000
 
 /* s390-specific vcpu->requests bit members */
 #define KVM_REQ_ENABLE_IBS         8
@@ -247,6 +247,7 @@
 	u32 exit_instruction;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 	u32 instruction_lctl;
 	u32 instruction_lctlg;
@@ -544,10 +545,6 @@
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
-	union  {
-		struct cpuid	cpu_id;
-		u64		stidp_data;
-	};
 	struct gmap *gmap;
 	struct kvm_guestdbg_info_arch guestdbg;
 	unsigned long pfault_token;
@@ -605,7 +602,7 @@
 	__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
 	/* facility list requested by guest (in dma page) */
 	__u64 *fac_list;
-	struct cpuid cpu_id;
+	u64 cpuid;
 	unsigned short ibc;
 };
 
@@ -700,4 +697,6 @@
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index bd7893d..e4f6f73 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -69,6 +69,7 @@
 	unsigned int max_cores;
 	unsigned long hsa_size;
 	unsigned long facilities;
+	unsigned int hmfai;
 };
 extern struct sclp_info sclp;
 
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index ec60cf7..1c8f33f 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -27,6 +27,7 @@
 
 /* SIGP cpu status bits */
 
+#define SIGP_STATUS_INVALID_ORDER	0x00000002UL
 #define SIGP_STATUS_CHECK_STOP		0x00000010UL
 #define SIGP_STATUS_STOPPED		0x00000040UL
 #define SIGP_STATUS_EXT_CALL_PENDING	0x00000080UL
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 347fe5a..3b8e99e 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
 #define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
 #define KVM_DEV_FLIC_ADAPTER_REGISTER	6
 #define KVM_DEV_FLIC_ADAPTER_MODIFY	7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 5dbaa72..8fb5d4a 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -16,14 +16,19 @@
 	{ 0x01, "SIGP sense" },					\
 	{ 0x02, "SIGP external call" },				\
 	{ 0x03, "SIGP emergency signal" },			\
+	{ 0x04, "SIGP start" },					\
 	{ 0x05, "SIGP stop" },					\
 	{ 0x06, "SIGP restart" },				\
 	{ 0x09, "SIGP stop and store status" },			\
 	{ 0x0b, "SIGP initial cpu reset" },			\
+	{ 0x0c, "SIGP cpu reset" },				\
 	{ 0x0d, "SIGP set prefix" },				\
 	{ 0x0e, "SIGP store status at address" },		\
 	{ 0x12, "SIGP set architecture" },			\
-	{ 0x15, "SIGP sense running" }
+	{ 0x13, "SIGP conditional emergency signal" },		\
+	{ 0x15, "SIGP sense running" },				\
+	{ 0x16, "SIGP set multithreading"},			\
+	{ 0x17, "SIGP store additional status ait address"}
 
 #define icpt_prog_codes						\
 	{ 0x0001, "Prog Operation" },				\
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5ea5af3..b190023 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -28,6 +28,7 @@
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_INVALID_WAKEUPS
 	select SRCU
 	select KVM_VFIO
 	---help---
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 84efc2b..5a80af7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -977,6 +977,11 @@
 
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * We cannot move this into the if, as the CPU might be already
+	 * in kvm_vcpu_block without having the waitqueue set (polling)
+	 */
+	vcpu->valid_wakeup = true;
 	if (swait_active(&vcpu->wq)) {
 		/*
 		 * The vcpu gave up the cpu voluntarily, mark it as a good
@@ -2034,6 +2039,27 @@
 	return ret;
 }
 
+static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
+
+{
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+	u32 schid;
+
+	if (attr->flags)
+		return -EINVAL;
+	if (attr->attr != sizeof(schid))
+		return -EINVAL;
+	if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
+		return -EFAULT;
+	kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
+	/*
+	 * If userspace is conforming to the architecture, we can have at most
+	 * one pending I/O interrupt per subchannel, so this is effectively a
+	 * clear all.
+	 */
+	return 0;
+}
+
 static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r = 0;
@@ -2067,6 +2093,9 @@
 	case KVM_DEV_FLIC_ADAPTER_MODIFY:
 		r = modify_io_adapter(dev, attr);
 		break;
+	case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+		r = clear_io_irq(dev->kvm, attr);
+		break;
 	default:
 		r = -EINVAL;
 	}
@@ -2074,6 +2103,23 @@
 	return r;
 }
 
+static int flic_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+	case KVM_DEV_FLIC_ENQUEUE:
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+	case KVM_DEV_FLIC_APF_ENABLE:
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+	case KVM_DEV_FLIC_ADAPTER_REGISTER:
+	case KVM_DEV_FLIC_ADAPTER_MODIFY:
+	case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+		return 0;
+	}
+	return -ENXIO;
+}
+
 static int flic_create(struct kvm_device *dev, u32 type)
 {
 	if (!dev)
@@ -2095,6 +2141,7 @@
 	.name = "kvm-flic",
 	.get_attr = flic_get_attr,
 	.set_attr = flic_set_attr,
+	.has_attr = flic_has_attr,
 	.create = flic_create,
 	.destroy = flic_destroy,
 };
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 668c087..6d8ec3a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -65,6 +65,7 @@
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -118,9 +119,9 @@
 };
 
 /* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[] = {
-	0xffe6fffbfcfdfc40UL,
-	0x005e800000000000UL,
+unsigned long kvm_s390_fac_list_mask[16] = {
+	0xffe6000000000000UL,
+	0x005e000000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
@@ -638,6 +639,7 @@
 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	struct kvm_s390_vm_cpu_processor *proc;
+	u16 lowest_ibc, unblocked_ibc;
 	int ret = 0;
 
 	mutex_lock(&kvm->lock);
@@ -652,9 +654,17 @@
 	}
 	if (!copy_from_user(proc, (void __user *)attr->addr,
 			    sizeof(*proc))) {
-		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
-		       sizeof(struct cpuid));
-		kvm->arch.model.ibc = proc->ibc;
+		kvm->arch.model.cpuid = proc->cpuid;
+		lowest_ibc = sclp.ibc >> 16 & 0xfff;
+		unblocked_ibc = sclp.ibc & 0xfff;
+		if (lowest_ibc) {
+			if (proc->ibc > unblocked_ibc)
+				kvm->arch.model.ibc = unblocked_ibc;
+			else if (proc->ibc < lowest_ibc)
+				kvm->arch.model.ibc = lowest_ibc;
+			else
+				kvm->arch.model.ibc = proc->ibc;
+		}
 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
 	} else
@@ -687,7 +697,7 @@
 		ret = -ENOMEM;
 		goto out;
 	}
-	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
+	proc->cpuid = kvm->arch.model.cpuid;
 	proc->ibc = kvm->arch.model.ibc;
 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1081,10 +1091,13 @@
 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
 }
 
-static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
+static u64 kvm_s390_get_initial_cpuid(void)
 {
-	get_cpu_id(cpu_id);
-	cpu_id->version = 0xff;
+	struct cpuid cpuid;
+
+	get_cpu_id(&cpuid);
+	cpuid.version = 0xff;
+	return *((u64 *) &cpuid);
 }
 
 static void kvm_s390_crypto_init(struct kvm *kvm)
@@ -1175,7 +1188,7 @@
 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 
-	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
+	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
 	kvm_s390_crypto_init(kvm);
@@ -1624,7 +1637,6 @@
 {
 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
 
-	vcpu->arch.cpu_id = model->cpu_id;
 	vcpu->arch.sie_block->ibc = model->ibc;
 	if (test_kvm_facility(vcpu->kvm, 7))
 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
@@ -1645,11 +1657,14 @@
 
 	kvm_s390_vcpu_setup_model(vcpu);
 
-	vcpu->arch.sie_block->ecb   = 6;
+	vcpu->arch.sie_block->ecb = 0x02;
+	if (test_kvm_facility(vcpu->kvm, 9))
+		vcpu->arch.sie_block->ecb |= 0x04;
 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= 0x10;
 
-	vcpu->arch.sie_block->ecb2  = 8;
+	if (test_kvm_facility(vcpu->kvm, 8))
+		vcpu->arch.sie_block->ecb2 |= 0x08;
 	vcpu->arch.sie_block->eca   = 0xC1002000U;
 	if (sclp.has_siif)
 		vcpu->arch.sie_block->eca |= 1;
@@ -2971,13 +2986,31 @@
 	return;
 }
 
+static inline unsigned long nonhyp_mask(int i)
+{
+	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
+
+	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
+}
+
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
+{
+	vcpu->valid_wakeup = false;
+}
+
 static int __init kvm_s390_init(void)
 {
+	int i;
+
 	if (!sclp.has_sief2) {
 		pr_info("SIE not available\n");
 		return -ENODEV;
 	}
 
+	for (i = 0; i < 16; i++)
+		kvm_s390_fac_list_mask[i] |=
+			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
+
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 }
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0a1591d..95916fa 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -439,7 +439,7 @@
 
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
-	u64 stidp_data = vcpu->arch.stidp_data;
+	u64 stidp_data = vcpu->kvm->arch.model.cpuid;
 	u64 operand2;
 	int rc;
 	ar_t ar;
@@ -670,8 +670,9 @@
 	if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	/* Only provide non-quiescing support if the host supports it */
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+	/* Only provide non-quiescing support if enabled for the guest */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+	    !test_kvm_facility(vcpu->kvm, 14))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* No support for conditional-SSKE */
@@ -744,7 +745,7 @@
 {
 	/* entries expected to be 1FF */
 	int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
-	unsigned long *cbrlo, cbrle;
+	unsigned long *cbrlo;
 	struct gmap *gmap;
 	int i;
 
@@ -765,17 +766,9 @@
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 	down_read(&gmap->mm->mmap_sem);
-	for (i = 0; i < entries; ++i) {
-		cbrle = cbrlo[i];
-		if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
-			/* invalid entry */
-			break;
-		/* try to free backing */
-		__gmap_zap(gmap, cbrle);
-	}
+	for (i = 0; i < entries; ++i)
+		__gmap_zap(gmap, cbrlo[i]);
 	up_read(&gmap->mm->mmap_sem);
-	if (i < entries)
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	return 0;
 }
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 77c22d6..28ea0ca 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -240,6 +240,12 @@
 	struct kvm_s390_local_interrupt *li;
 	int rc;
 
+	if (!test_kvm_facility(vcpu->kvm, 9)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_ORDER;
+		return SIGP_CC_STATUS_STORED;
+	}
+
 	li = &dst_vcpu->arch.local_int;
 	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
 		/* running */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7e3944..e0fbe7e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -562,7 +562,6 @@
 	struct {
 		u64 msr_val;
 		u64 last_steal;
-		u64 accum_steal;
 		struct gfn_to_hva_cache stime;
 		struct kvm_steal_time steal;
 	} st;
@@ -774,6 +773,11 @@
 	u8 nr_reserved_ioapic_pins;
 
 	bool disabled_lapic_found;
+
+	/* Struct members for AVIC */
+	u32 ldr_mode;
+	struct page *avic_logical_id_table_page;
+	struct page *avic_physical_id_table_page;
 };
 
 struct kvm_vm_stat {
@@ -804,6 +808,7 @@
 	u32 halt_exits;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
+	u32 halt_poll_invalid;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
 	u32 irq_exits;
@@ -848,6 +853,9 @@
 	bool (*cpu_has_high_real_mode_segbase)(void);
 	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
+	int (*vm_init)(struct kvm *kvm);
+	void (*vm_destroy)(struct kvm *kvm);
+
 	/* Create, but do not attach this VCPU */
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
@@ -914,7 +922,7 @@
 	bool (*get_enable_apicv)(void);
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
-	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
+	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@@ -990,8 +998,13 @@
 	 */
 	int (*pre_block)(struct kvm_vcpu *vcpu);
 	void (*post_block)(struct kvm_vcpu *vcpu);
+
+	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
+	void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
+
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
+	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1341,7 +1354,18 @@
 void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq);
 
-static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->vcpu_blocking)
+		kvm_x86_ops->vcpu_blocking(vcpu);
+}
+
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->vcpu_unblocking)
+		kvm_x86_ops->vcpu_unblocking(vcpu);
+}
+
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6136d99..d0fe23e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -78,7 +78,8 @@
 	u32 exit_int_info;
 	u32 exit_int_info_err;
 	u64 nested_ctl;
-	u8 reserved_4[16];
+	u64 avic_vapic_bar;
+	u8 reserved_4[8];
 	u32 event_inj;
 	u32 event_inj_err;
 	u64 nested_cr3;
@@ -88,7 +89,11 @@
 	u64 next_rip;
 	u8 insn_len;
 	u8 insn_bytes[15];
-	u8 reserved_6[800];
+	u64 avic_backing_page;	/* Offset 0xe0 */
+	u8 reserved_6[8];	/* Offset 0xe8 */
+	u64 avic_logical_id;	/* Offset 0xf0 */
+	u64 avic_physical_id;	/* Offset 0xf8 */
+	u8 reserved_7[768];
 };
 
 
@@ -111,6 +116,9 @@
 #define V_INTR_MASKING_SHIFT 24
 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
 
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
 #define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index cd54147..739c0c5 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -216,9 +216,9 @@
 	__u32 padding[3];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		(1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		(1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		(1 << 2)
 
 /* for KVM_SET_CPUID2 */
 struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 8a4add8..b9e9bb2 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -73,6 +73,8 @@
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_NPF           0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
 
 #define SVM_EXIT_ERR           -1
 
@@ -107,8 +109,10 @@
 	{ SVM_EXIT_SMI,         "smi" }, \
 	{ SVM_EXIT_INIT,        "init" }, \
 	{ SVM_EXIT_VINTR,       "vintr" }, \
+	{ SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
 	{ SVM_EXIT_CPUID,       "cpuid" }, \
 	{ SVM_EXIT_INVD,        "invd" }, \
+	{ SVM_EXIT_PAUSE,       "pause" }, \
 	{ SVM_EXIT_HLT,         "hlt" }, \
 	{ SVM_EXIT_INVLPG,      "invlpg" }, \
 	{ SVM_EXIT_INVLPGA,     "invlpga" }, \
@@ -127,7 +131,10 @@
 	{ SVM_EXIT_MONITOR,     "monitor" }, \
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
-	{ SVM_EXIT_NPF,         "npf" }
+	{ SVM_EXIT_NPF,         "npf" }, \
+	{ SVM_EXIT_RSM,         "rsm" }, \
+	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
+	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }
 
 
 #endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 2af478e..f2356bd 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -19,8 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/efi.h>
-#include <linux/verify_pefile.h>
-#include <keys/system_keyring.h>
+#include <linux/verification.h>
 
 #include <asm/bootparam.h>
 #include <asm/setup.h>
@@ -529,18 +528,9 @@
 #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
 static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
 {
-	bool trusted;
-	int ret;
-
-	ret = verify_pefile_signature(kernel, kernel_len,
-				      system_trusted_keyring,
-				      VERIFYING_KEXEC_PE_SIGNATURE,
-				      &trusted);
-	if (ret < 0)
-		return ret;
-	if (!trusted)
-		return -EKEYREJECTED;
-	return 0;
+	return verify_pefile_signature(kernel, kernel_len,
+				       NULL,
+				       VERIFYING_KEXEC_PE_SIGNATURE);
 }
 #endif
 
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 9db47090..5f42d03 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -443,7 +443,7 @@
 		spin_lock(&ioapic->lock);
 
 		if (trigger_mode != IOAPIC_LEVEL_TRIG ||
-		    kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
+		    kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
 			continue;
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 54ead79..dfb4c64 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -382,9 +382,6 @@
 	u32 i, nr_ioapic_pins;
 	int idx;
 
-	/* kvm->irq_routing must be read after clearing
-	 * KVM_SCAN_IOAPIC. */
-	smp_mb();
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1a2da0e..bbb5b28 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,9 +59,8 @@
 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
 #define apic_debug(fmt, arg...)
 
-#define APIC_LVT_NUM			6
 /* 14 is the version for Xeon and Pentium 8.4.8*/
-#define APIC_VERSION			(0x14UL | ((APIC_LVT_NUM - 1) << 16))
+#define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
 #define LAPIC_MMIO_LENGTH		(1 << 12)
 /* followed define is not in apicdef.h */
 #define APIC_SHORT_MASK			0xc0000
@@ -73,14 +72,6 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
-#define VEC_POS(v) ((v) & (32 - 1))
-#define REG_POS(v) (((v) >> 5) << 4)
-
-static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
-{
-	*((u32 *) (apic->regs + reg_off)) = val;
-}
-
 static inline int apic_test_vector(int vec, void *bitmap)
 {
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -94,11 +85,6 @@
 		apic_test_vector(vector, apic->regs + APIC_IRR);
 }
 
-static inline void apic_set_vector(int vec, void *bitmap)
-{
-	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
 static inline void apic_clear_vector(int vec, void *bitmap)
 {
 	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -173,7 +159,7 @@
 			continue;
 
 		aid = kvm_apic_id(apic);
-		ldr = kvm_apic_get_reg(apic, APIC_LDR);
+		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 
 		if (aid < ARRAY_SIZE(new->phys_map))
 			new->phys_map[aid] = apic;
@@ -182,7 +168,7 @@
 			new->mode |= KVM_APIC_MODE_X2APIC;
 		} else if (ldr) {
 			ldr = GET_APIC_LOGICAL_ID(ldr);
-			if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 			else
 				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
@@ -212,7 +198,7 @@
 {
 	bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
-	apic_set_reg(apic, APIC_SPIV, val);
+	kvm_lapic_set_reg(apic, APIC_SPIV, val);
 
 	if (enabled != apic->sw_enabled) {
 		apic->sw_enabled = enabled;
@@ -226,13 +212,13 @@
 
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
 {
-	apic_set_reg(apic, APIC_ID, id << 24);
+	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 	recalculate_apic_map(apic->vcpu->kvm);
 }
 
 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 {
-	apic_set_reg(apic, APIC_LDR, id);
+	kvm_lapic_set_reg(apic, APIC_LDR, id);
 	recalculate_apic_map(apic->vcpu->kvm);
 }
 
@@ -240,19 +226,19 @@
 {
 	u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
 
-	apic_set_reg(apic, APIC_ID, id << 24);
-	apic_set_reg(apic, APIC_LDR, ldr);
+	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
+	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 	recalculate_apic_map(apic->vcpu->kvm);
 }
 
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
-	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 }
 
 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 {
-	return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+	return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 }
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
@@ -287,10 +273,10 @@
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
 		v |= APIC_LVR_DIRECTED_EOI;
-	apic_set_reg(apic, APIC_LVR, v);
+	kvm_lapic_set_reg(apic, APIC_LVR, v);
 }
 
-static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
@@ -349,16 +335,6 @@
 }
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
-static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
-{
-	apic_set_vector(vec, apic->regs + APIC_IRR);
-	/*
-	 * irr_pending must be true if any interrupt is pending; set it after
-	 * APIC_IRR to avoid race with apic_clear_irr
-	 */
-	apic->irr_pending = true;
-}
-
 static inline int apic_search_irr(struct kvm_lapic *apic)
 {
 	return find_highest_vector(apic->regs + APIC_IRR);
@@ -416,7 +392,7 @@
 	 * just set SVI.
 	 */
 	if (unlikely(vcpu->arch.apicv_active))
-		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
+		kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 	else {
 		++apic->isr_count;
 		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -464,7 +440,7 @@
 	 * and must be left alone.
 	 */
 	if (unlikely(vcpu->arch.apicv_active))
-		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+		kvm_x86_ops->hwapic_isr_update(vcpu,
 					       apic_find_highest_isr(apic));
 	else {
 		--apic->isr_count;
@@ -549,8 +525,8 @@
 	u32 tpr, isrv, ppr, old_ppr;
 	int isr;
 
-	old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
-	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
+	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
+	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
 
@@ -563,7 +539,7 @@
 		   apic, ppr, isr, isrv);
 
 	if (old_ppr != ppr) {
-		apic_set_reg(apic, APIC_PROCPRI, ppr);
+		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 		if (ppr < old_ppr)
 			kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 	}
@@ -571,7 +547,7 @@
 
 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 {
-	apic_set_reg(apic, APIC_TASKPRI, tpr);
+	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 	apic_update_ppr(apic);
 }
 
@@ -601,7 +577,7 @@
 	if (kvm_apic_broadcast(apic, mda))
 		return true;
 
-	logical_id = kvm_apic_get_reg(apic, APIC_LDR);
+	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 
 	if (apic_x2apic_mode(apic))
 		return ((logical_id >> 16) == (mda >> 16))
@@ -610,7 +586,7 @@
 	logical_id = GET_APIC_LOGICAL_ID(logical_id);
 	mda = GET_APIC_DEST_FIELD(mda);
 
-	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
+	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
 		return (logical_id & mda) != 0;
 	case APIC_DFR_CLUSTER:
@@ -618,7 +594,7 @@
 		       && (logical_id & mda & 0xf) != 0;
 	default:
 		apic_debug("Bad DFR vcpu %d: %08x\n",
-			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
+			   apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
 		return false;
 	}
 }
@@ -668,6 +644,7 @@
 		return false;
 	}
 }
+EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 		       const unsigned long *bitmap, u32 bitmap_size)
@@ -921,7 +898,7 @@
 
 		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
 			if (trig_mode)
-				apic_set_vector(vector, apic->regs + APIC_TMR);
+				kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
 			else
 				apic_clear_vector(vector, apic->regs + APIC_TMR);
 		}
@@ -929,7 +906,7 @@
 		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 		else {
-			apic_set_irr(vector, apic);
+			kvm_lapic_set_irr(vector, apic);
 
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
@@ -1073,8 +1050,8 @@
 
 static void apic_send_ipi(struct kvm_lapic *apic)
 {
-	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
-	u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
+	u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
+	u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
 	struct kvm_lapic_irq irq;
 
 	irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -1111,7 +1088,7 @@
 	ASSERT(apic != NULL);
 
 	/* if initial count is 0, current count should also be 0 */
-	if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 ||
+	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
 		apic->lapic_timer.period == 0)
 		return 0;
 
@@ -1168,13 +1145,13 @@
 		break;
 	case APIC_PROCPRI:
 		apic_update_ppr(apic);
-		val = kvm_apic_get_reg(apic, offset);
+		val = kvm_lapic_get_reg(apic, offset);
 		break;
 	case APIC_TASKPRI:
 		report_tpr_access(apic, false);
 		/* fall thru */
 	default:
-		val = kvm_apic_get_reg(apic, offset);
+		val = kvm_lapic_get_reg(apic, offset);
 		break;
 	}
 
@@ -1186,7 +1163,7 @@
 	return container_of(dev, struct kvm_lapic, dev);
 }
 
-static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 		void *data)
 {
 	unsigned char alignment = offset & 0xf;
@@ -1223,6 +1200,7 @@
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
 
 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
 {
@@ -1240,7 +1218,7 @@
 	if (!apic_mmio_in_range(apic, address))
 		return -EOPNOTSUPP;
 
-	apic_reg_read(apic, offset, len, data);
+	kvm_lapic_reg_read(apic, offset, len, data);
 
 	return 0;
 }
@@ -1249,7 +1227,7 @@
 {
 	u32 tmp1, tmp2, tdcr;
 
-	tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
+	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
 	apic->divide_count = 0x1 << (tmp2 & 0x7);
@@ -1260,7 +1238,7 @@
 
 static void apic_update_lvtt(struct kvm_lapic *apic)
 {
-	u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
 			apic->lapic_timer.timer_mode_mask;
 
 	if (apic->lapic_timer.timer_mode != timer_mode) {
@@ -1296,7 +1274,7 @@
 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
+	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
 
 	if (kvm_apic_hw_enabled(apic)) {
 		int vec = reg & APIC_VECTOR_MASK;
@@ -1344,7 +1322,7 @@
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
 		/* lapic timer in oneshot or periodic mode */
 		now = apic->lapic_timer.timer.base->get_time();
-		apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
+		apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
 			    * APIC_BUS_CYCLE_NS * apic->divide_count;
 
 		if (!apic->lapic_timer.period)
@@ -1376,7 +1354,7 @@
 			   "timer initial count 0x%x, period %lldns, "
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-			   kvm_apic_get_reg(apic, APIC_TMICT),
+			   kvm_lapic_get_reg(apic, APIC_TMICT),
 			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
@@ -1425,7 +1403,7 @@
 	}
 }
 
-static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
+int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
 	int ret = 0;
 
@@ -1457,7 +1435,7 @@
 
 	case APIC_DFR:
 		if (!apic_x2apic_mode(apic)) {
-			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+			kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
 			recalculate_apic_map(apic->vcpu->kvm);
 		} else
 			ret = 1;
@@ -1465,17 +1443,17 @@
 
 	case APIC_SPIV: {
 		u32 mask = 0x3ff;
-		if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
+		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 			mask |= APIC_SPIV_DIRECTED_EOI;
 		apic_set_spiv(apic, val & mask);
 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
 			int i;
 			u32 lvt_val;
 
-			for (i = 0; i < APIC_LVT_NUM; i++) {
-				lvt_val = kvm_apic_get_reg(apic,
+			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
+				lvt_val = kvm_lapic_get_reg(apic,
 						       APIC_LVTT + 0x10 * i);
-				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
+				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
 			apic_update_lvtt(apic);
@@ -1486,14 +1464,14 @@
 	}
 	case APIC_ICR:
 		/* No delay here, so we always clear the pending bit */
-		apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
+		kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
 		apic_send_ipi(apic);
 		break;
 
 	case APIC_ICR2:
 		if (!apic_x2apic_mode(apic))
 			val &= 0xff000000;
-		apic_set_reg(apic, APIC_ICR2, val);
+		kvm_lapic_set_reg(apic, APIC_ICR2, val);
 		break;
 
 	case APIC_LVT0:
@@ -1507,7 +1485,7 @@
 			val |= APIC_LVT_MASKED;
 
 		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
-		apic_set_reg(apic, reg, val);
+		kvm_lapic_set_reg(apic, reg, val);
 
 		break;
 
@@ -1515,7 +1493,7 @@
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
-		apic_set_reg(apic, APIC_LVTT, val);
+		kvm_lapic_set_reg(apic, APIC_LVTT, val);
 		apic_update_lvtt(apic);
 		break;
 
@@ -1524,14 +1502,14 @@
 			break;
 
 		hrtimer_cancel(&apic->lapic_timer.timer);
-		apic_set_reg(apic, APIC_TMICT, val);
+		kvm_lapic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
 		break;
 
 	case APIC_TDCR:
 		if (val & 4)
 			apic_debug("KVM_WRITE:TDCR %x\n", val);
-		apic_set_reg(apic, APIC_TDCR, val);
+		kvm_lapic_set_reg(apic, APIC_TDCR, val);
 		update_divide_count(apic);
 		break;
 
@@ -1544,7 +1522,7 @@
 
 	case APIC_SELF_IPI:
 		if (apic_x2apic_mode(apic)) {
-			apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
+			kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
 		} else
 			ret = 1;
 		break;
@@ -1556,6 +1534,7 @@
 		apic_debug("Local APIC Write to read-only register %x\n", reg);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
 
 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
@@ -1585,14 +1564,14 @@
 		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
 			   "0x%x\n", __func__, offset, len, val);
 
-	apic_reg_write(apic, offset & 0xff0, val);
+	kvm_lapic_reg_write(apic, offset & 0xff0, val);
 
 	return 0;
 }
 
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
-	apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
+	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
 
@@ -1604,10 +1583,10 @@
 	/* hw has done the conditional check and inst decode */
 	offset &= 0xff0;
 
-	apic_reg_read(vcpu->arch.apic, offset, 4, &val);
+	kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
 
 	/* TODO: optimize to just emulate side effect w/o one more write */
-	apic_reg_write(vcpu->arch.apic, offset, val);
+	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
 }
 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
 
@@ -1667,14 +1646,14 @@
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
-		     | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
+		     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
 	u64 tpr;
 
-	tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
+	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -1740,28 +1719,28 @@
 		kvm_apic_set_id(apic, vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
-	for (i = 0; i < APIC_LVT_NUM; i++)
-		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
+		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic_update_lvtt(apic);
 	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
-		apic_set_reg(apic, APIC_LVT0,
+		kvm_lapic_set_reg(apic, APIC_LVT0,
 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
-	apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
+	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
 
-	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
+	kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
-	apic_set_reg(apic, APIC_TASKPRI, 0);
+	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
 	if (!apic_x2apic_mode(apic))
 		kvm_apic_set_ldr(apic, 0);
-	apic_set_reg(apic, APIC_ESR, 0);
-	apic_set_reg(apic, APIC_ICR, 0);
-	apic_set_reg(apic, APIC_ICR2, 0);
-	apic_set_reg(apic, APIC_TDCR, 0);
-	apic_set_reg(apic, APIC_TMICT, 0);
+	kvm_lapic_set_reg(apic, APIC_ESR, 0);
+	kvm_lapic_set_reg(apic, APIC_ICR, 0);
+	kvm_lapic_set_reg(apic, APIC_ICR2, 0);
+	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
+	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
 	for (i = 0; i < 8; i++) {
-		apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
-		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
-		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
+		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
+		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	apic->irr_pending = vcpu->arch.apicv_active;
 	apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
@@ -1806,7 +1785,7 @@
 
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	u32 reg = kvm_apic_get_reg(apic, lvt_type);
+	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
 
 	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
@@ -1901,14 +1880,14 @@
 	apic_update_ppr(apic);
 	highest_irr = apic_find_highest_irr(apic);
 	if ((highest_irr == -1) ||
-	    ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
+	    ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
 		return -1;
 	return highest_irr;
 }
 
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
-	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
+	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
 	int r = 0;
 
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
@@ -1974,7 +1953,7 @@
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
 	apic_update_lvtt(apic);
-	apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
+	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
@@ -1982,9 +1961,11 @@
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
 	if (vcpu->arch.apicv_active) {
+		if (kvm_x86_ops->apicv_post_state_restore)
+			kvm_x86_ops->apicv_post_state_restore(vcpu);
 		kvm_x86_ops->hwapic_irr_update(vcpu,
 				apic_find_highest_irr(apic));
-		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+		kvm_x86_ops->hwapic_isr_update(vcpu,
 				apic_find_highest_isr(apic));
 	}
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -2097,7 +2078,7 @@
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
+	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
 	max_irr = apic_find_highest_irr(apic);
 	if (max_irr < 0)
 		max_irr = 0;
@@ -2139,8 +2120,8 @@
 
 	/* if this is ICR write vector before command */
 	if (reg == APIC_ICR)
-		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
-	return apic_reg_write(apic, reg, (u32)data);
+		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return kvm_lapic_reg_write(apic, reg, (u32)data);
 }
 
 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -2157,10 +2138,10 @@
 		return 1;
 	}
 
-	if (apic_reg_read(apic, reg, 4, &low))
+	if (kvm_lapic_reg_read(apic, reg, 4, &low))
 		return 1;
 	if (reg == APIC_ICR)
-		apic_reg_read(apic, APIC_ICR2, 4, &high);
+		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
 
@@ -2176,8 +2157,8 @@
 
 	/* if this is ICR write vector before command */
 	if (reg == APIC_ICR)
-		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
-	return apic_reg_write(apic, reg, (u32)data);
+		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return kvm_lapic_reg_write(apic, reg, (u32)data);
 }
 
 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
@@ -2188,10 +2169,10 @@
 	if (!lapic_in_kernel(vcpu))
 		return 1;
 
-	if (apic_reg_read(apic, reg, 4, &low))
+	if (kvm_lapic_reg_read(apic, reg, 4, &low))
 		return 1;
 	if (reg == APIC_ICR)
-		apic_reg_read(apic, APIC_ICR2, 4, &high);
+		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f71183e..891c6da 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,6 +7,10 @@
 
 #define KVM_APIC_INIT		0
 #define KVM_APIC_SIPI		1
+#define KVM_APIC_LVT_NUM	6
+
+#define KVM_APIC_SHORT_MASK	0xc0000
+#define KVM_APIC_DEST_MASK	0x800
 
 struct kvm_timer {
 	struct hrtimer timer;
@@ -59,6 +63,11 @@
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
+int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
+int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+		       void *data);
+bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+			   int short_hand, unsigned int dest, int dest_mode);
 
 void __kvm_apic_update_irr(u32 *pir, void *regs);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
@@ -99,9 +108,32 @@
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
 
-static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+
+static inline void kvm_lapic_set_vector(int vec, void *bitmap)
 {
-	        return *((u32 *) (apic->regs + reg_off));
+	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
+{
+	kvm_lapic_set_vector(vec, apic->regs + APIC_IRR);
+	/*
+	 * irr_pending must be true if any interrupt is pending; set it after
+	 * APIC_IRR to avoid race with apic_clear_irr
+	 */
+	apic->irr_pending = true;
+}
+
+static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+	return *((u32 *) (apic->regs + reg_off));
+}
+
+static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
+{
+	*((u32 *) (apic->regs + reg_off)) = val;
 }
 
 extern struct static_key kvm_no_apic_vcpu;
@@ -169,7 +201,7 @@
 
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
-	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+	return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 38c0c32..24e8001 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1909,18 +1909,17 @@
  * since it has been deleted from active_mmu_pages but still can be found
  * at hast list.
  *
- * for_each_gfn_indirect_valid_sp has skipped that kind of page and
- * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
- * all the obsolete pages.
+ * for_each_gfn_valid_sp() has skipped that kind of pages.
  */
-#define for_each_gfn_sp(_kvm, _sp, _gfn)				\
+#define for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
 	hlist_for_each_entry(_sp,					\
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-		if ((_sp)->gfn != (_gfn)) {} else
+		if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
+			|| (_sp)->role.invalid) {} else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
-	for_each_gfn_sp(_kvm, _sp, _gfn)				\
-		if ((_sp)->role.direct || (_sp)->role.invalid) {} else
+	for_each_gfn_valid_sp(_kvm, _sp, _gfn)				\
+		if ((_sp)->role.direct) {} else
 
 /* @sp->gfn should be write-protected at the call site */
 static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1961,6 +1960,11 @@
 static void mmu_audit_disable(void) { }
 #endif
 
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
 static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			 struct list_head *invalid_list)
 {
@@ -2105,11 +2109,6 @@
 	__clear_sp_write_flooding_count(sp);
 }
 
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -2136,10 +2135,7 @@
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
 	}
-	for_each_gfn_sp(vcpu->kvm, sp, gfn) {
-		if (is_obsolete_sp(vcpu->kvm, sp))
-			continue;
-
+	for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
 		if (!need_sync && sp->unsync)
 			need_sync = true;
 
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 3f8c732..c146f3c 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -44,8 +44,6 @@
 	case MSR_MTRRdefType:
 	case MSR_IA32_CR_PAT:
 		return true;
-	case 0x2f8:
-		return true;
 	}
 	return false;
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fafd720..2214214 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -14,6 +14,9 @@
  * the COPYING file in the top-level directory.
  *
  */
+
+#define pr_fmt(fmt) "SVM: " fmt
+
 #include <linux/kvm_host.h>
 
 #include "irq.h"
@@ -32,6 +35,7 @@
 #include <linux/trace_events.h>
 #include <linux/slab.h>
 
+#include <asm/apic.h>
 #include <asm/perf_event.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
@@ -68,6 +72,8 @@
 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
 
+#define SVM_AVIC_DOORBELL	0xc001011b
+
 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
@@ -78,6 +84,18 @@
 #define TSC_RATIO_MIN		0x0000000000000001ULL
 #define TSC_RATIO_MAX		0x000000ffffffffffULL
 
+#define AVIC_HPA_MASK	~((0xFFFULL << 52) || 0xFFF)
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe.  APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT	255
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -162,8 +180,21 @@
 
 	/* cached guest cpuid flags for faster access */
 	bool nrips_enabled	: 1;
+
+	u32 ldr_reg;
+	struct page *avic_backing_page;
+	u64 *avic_physical_id_cache;
+	bool avic_is_running;
 };
 
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	(0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	(0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK		(1ULL << 63)
+
 static DEFINE_PER_CPU(u64, current_tsc_ratio);
 #define TSC_RATIO_DEFAULT	0x0100000000ULL
 
@@ -205,6 +236,10 @@
 static int nested = true;
 module_param(nested, int, S_IRUGO);
 
+/* enable / disable AVIC */
+static int avic;
+module_param(avic, int, S_IRUGO);
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -228,12 +263,18 @@
 	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
 	VMCB_CR2,        /* CR2 only */
 	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
+			  * AVIC PHYSICAL_TABLE pointer,
+			  * AVIC LOGICAL_TABLE pointer
+			  */
 	VMCB_DIRTY_MAX,
 };
 
 /* TPR and CR2 are always written before VMRUN */
 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
 
+#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
+
 static inline void mark_all_dirty(struct vmcb *vmcb)
 {
 	vmcb->control.clean = 0;
@@ -255,6 +296,23 @@
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
+static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
+{
+	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
+	mark_dirty(svm->vmcb, VMCB_AVIC);
+}
+
+static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 *entry = svm->avic_physical_id_cache;
+
+	if (!entry)
+		return false;
+
+	return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+}
+
 static void recalc_intercepts(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *c, *h;
@@ -923,6 +981,12 @@
 	} else
 		kvm_disable_tdp();
 
+	if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
+		avic = false;
+
+	if (avic)
+		pr_info("AVIC enabled\n");
+
 	return 0;
 
 err:
@@ -1000,6 +1064,22 @@
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
+static void avic_init_vmcb(struct vcpu_svm *svm)
+{
+	struct vmcb *vmcb = svm->vmcb;
+	struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+	phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
+	phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
+	phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+
+	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
+	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
+	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
+	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
+	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+	svm->vcpu.arch.apicv_active = true;
+}
+
 static void init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1014,7 +1094,8 @@
 	set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
 	set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 	set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
-	set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+	if (!kvm_vcpu_apicv_active(&svm->vcpu))
+		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 
 	set_dr_intercepts(svm);
 
@@ -1110,9 +1191,197 @@
 		set_intercept(svm, INTERCEPT_PAUSE);
 	}
 
+	if (avic)
+		avic_init_vmcb(svm);
+
 	mark_all_dirty(svm->vmcb);
 
 	enable_gif(svm);
+
+}
+
+static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
+{
+	u64 *avic_physical_id_table;
+	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+
+	if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
+		return NULL;
+
+	avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
+
+	return &avic_physical_id_table[index];
+}
+
+/**
+ * Note:
+ * AVIC hardware walks the nested page table to check permissions,
+ * but does not use the SPA address specified in the leaf page
+ * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
+ * field of the VMCB. Therefore, we set up the
+ * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
+ */
+static int avic_init_access_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int ret;
+
+	if (kvm->arch.apic_access_page_done)
+		return 0;
+
+	ret = x86_set_memory_region(kvm,
+				    APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+				    APIC_DEFAULT_PHYS_BASE,
+				    PAGE_SIZE);
+	if (ret)
+		return ret;
+
+	kvm->arch.apic_access_page_done = true;
+	return 0;
+}
+
+static int avic_init_backing_page(struct kvm_vcpu *vcpu)
+{
+	int ret;
+	u64 *entry, new_entry;
+	int id = vcpu->vcpu_id;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	ret = avic_init_access_page(vcpu);
+	if (ret)
+		return ret;
+
+	if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
+		return -EINVAL;
+
+	if (!svm->vcpu.arch.apic->regs)
+		return -EINVAL;
+
+	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
+
+	/* Setting AVIC backing page address in the phy APIC ID table */
+	entry = avic_get_physical_id_entry(vcpu, id);
+	if (!entry)
+		return -EINVAL;
+
+	new_entry = READ_ONCE(*entry);
+	new_entry = (page_to_phys(svm->avic_backing_page) &
+		     AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
+		     AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
+	WRITE_ONCE(*entry, new_entry);
+
+	svm->avic_physical_id_cache = entry;
+
+	return 0;
+}
+
+static void avic_vm_destroy(struct kvm *kvm)
+{
+	struct kvm_arch *vm_data = &kvm->arch;
+
+	if (vm_data->avic_logical_id_table_page)
+		__free_page(vm_data->avic_logical_id_table_page);
+	if (vm_data->avic_physical_id_table_page)
+		__free_page(vm_data->avic_physical_id_table_page);
+}
+
+static int avic_vm_init(struct kvm *kvm)
+{
+	int err = -ENOMEM;
+	struct kvm_arch *vm_data = &kvm->arch;
+	struct page *p_page;
+	struct page *l_page;
+
+	if (!avic)
+		return 0;
+
+	/* Allocating physical APIC ID table (4KB) */
+	p_page = alloc_page(GFP_KERNEL);
+	if (!p_page)
+		goto free_avic;
+
+	vm_data->avic_physical_id_table_page = p_page;
+	clear_page(page_address(p_page));
+
+	/* Allocating logical APIC ID table (4KB) */
+	l_page = alloc_page(GFP_KERNEL);
+	if (!l_page)
+		goto free_avic;
+
+	vm_data->avic_logical_id_table_page = l_page;
+	clear_page(page_address(l_page));
+
+	return 0;
+
+free_avic:
+	avic_vm_destroy(kvm);
+	return err;
+}
+
+/**
+ * This function is called during VCPU halt/unhalt.
+ */
+static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+{
+	u64 entry;
+	int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu);
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
+	svm->avic_is_running = is_run;
+
+	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
+	if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+		return;
+
+	entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
+
+	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+	if (is_run)
+		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+}
+
+static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	u64 entry;
+	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
+	int h_physical_id = __default_cpu_present_to_apicid(cpu);
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
+	if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+		return;
+
+	entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+
+	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+
+	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+	if (svm->avic_is_running)
+		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+
+	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+}
+
+static void avic_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	u64 entry;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
+	entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
 
 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1131,6 +1400,9 @@
 
 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+
+	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
+		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
 }
 
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@@ -1169,6 +1441,17 @@
 	if (!hsave_page)
 		goto free_page3;
 
+	if (avic) {
+		err = avic_init_backing_page(&svm->vcpu);
+		if (err)
+			goto free_page4;
+	}
+
+	/* We initialize this flag to true to make sure that the is_running
+	 * bit would be set the first time the vcpu is loaded.
+	 */
+	svm->avic_is_running = true;
+
 	svm->nested.hsave = page_address(hsave_page);
 
 	svm->msrpm = page_address(msrpm_pages);
@@ -1187,6 +1470,8 @@
 
 	return &svm->vcpu;
 
+free_page4:
+	__free_page(hsave_page);
 free_page3:
 	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
 free_page2:
@@ -1243,6 +1528,8 @@
 	/* This assumes that the kernel never uses MSR_TSC_AUX */
 	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+
+	avic_vcpu_load(vcpu, cpu);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1250,6 +1537,8 @@
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int i;
 
+	avic_vcpu_put(vcpu);
+
 	++vcpu->stat.host_state_reload;
 	kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
@@ -1265,6 +1554,16 @@
 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 }
 
+static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+	avic_set_running(vcpu, false);
+}
+
+static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+	avic_set_running(vcpu, true);
+}
+
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 {
 	return to_svm(vcpu)->vmcb->save.rflags;
@@ -2673,10 +2972,11 @@
 	disable_gif(svm);
 
 	/* After a CLGI no interrupts should come */
-	svm_clear_vintr(svm);
-	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
-
-	mark_dirty(svm->vmcb, VMCB_INTR);
+	if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
+		svm_clear_vintr(svm);
+		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+		mark_dirty(svm->vmcb, VMCB_INTR);
+	}
 
 	return 1;
 }
@@ -3212,6 +3512,10 @@
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_IA32_APICBASE:
+		if (kvm_vcpu_apicv_active(vcpu))
+			avic_update_vapic_bar(to_svm(vcpu), data);
+		/* Follow through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -3281,6 +3585,278 @@
 	return nop_interception(svm);
 }
 
+enum avic_ipi_failure_cause {
+	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+	AVIC_IPI_FAILURE_INVALID_TARGET,
+	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+{
+	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
+	u32 icrl = svm->vmcb->control.exit_info_1;
+	u32 id = svm->vmcb->control.exit_info_2 >> 32;
+	u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
+	struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+
+	switch (id) {
+	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+		/*
+		 * AVIC hardware handles the generation of
+		 * IPIs when the specified Message Type is Fixed
+		 * (also known as fixed delivery mode) and
+		 * the Trigger Mode is edge-triggered. The hardware
+		 * also supports self and broadcast delivery modes
+		 * specified via the Destination Shorthand(DSH)
+		 * field of the ICRL. Logical and physical APIC ID
+		 * formats are supported. All other IPI types cause
+		 * a #VMEXIT, which needs to emulated.
+		 */
+		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+		break;
+	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+		int i;
+		struct kvm_vcpu *vcpu;
+		struct kvm *kvm = svm->vcpu.kvm;
+		struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+		/*
+		 * At this point, we expect that the AVIC HW has already
+		 * set the appropriate IRR bits on the valid target
+		 * vcpus. So, we just need to kick the appropriate vcpu.
+		 */
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			bool m = kvm_apic_match_dest(vcpu, apic,
+						     icrl & KVM_APIC_SHORT_MASK,
+						     GET_APIC_DEST_FIELD(icrh),
+						     icrl & KVM_APIC_DEST_MASK);
+
+			if (m && !avic_vcpu_is_running(vcpu))
+				kvm_vcpu_wake_up(vcpu);
+		}
+		break;
+	}
+	case AVIC_IPI_FAILURE_INVALID_TARGET:
+		break;
+	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+		WARN_ONCE(1, "Invalid backing page\n");
+		break;
+	default:
+		pr_err("Unknown IPI interception\n");
+	}
+
+	return 1;
+}
+
+static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
+{
+	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	int index;
+	u32 *logical_apic_id_table;
+	int dlid = GET_APIC_LOGICAL_ID(ldr);
+
+	if (!dlid)
+		return NULL;
+
+	if (flat) { /* flat */
+		index = ffs(dlid) - 1;
+		if (index > 7)
+			return NULL;
+	} else { /* cluster */
+		int cluster = (dlid & 0xf0) >> 4;
+		int apic = ffs(dlid & 0x0f) - 1;
+
+		if ((apic < 0) || (apic > 7) ||
+		    (cluster >= 0xf))
+			return NULL;
+		index = (cluster << 2) + apic;
+	}
+
+	logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+
+	return &logical_apic_id_table[index];
+}
+
+static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
+			  bool valid)
+{
+	bool flat;
+	u32 *entry, new_entry;
+
+	flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
+	entry = avic_get_logical_id_entry(vcpu, ldr, flat);
+	if (!entry)
+		return -EINVAL;
+
+	new_entry = READ_ONCE(*entry);
+	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
+	if (valid)
+		new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+	else
+		new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+	WRITE_ONCE(*entry, new_entry);
+
+	return 0;
+}
+
+static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
+{
+	int ret;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+
+	if (!ldr)
+		return 1;
+
+	ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
+	if (ret && svm->ldr_reg) {
+		avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
+		svm->ldr_reg = 0;
+	} else {
+		svm->ldr_reg = ldr;
+	}
+	return ret;
+}
+
+static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
+{
+	u64 *old, *new;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
+	u32 id = (apic_id_reg >> 24) & 0xff;
+
+	if (vcpu->vcpu_id == id)
+		return 0;
+
+	old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
+	new = avic_get_physical_id_entry(vcpu, id);
+	if (!new || !old)
+		return 1;
+
+	/* We need to move physical_id_entry to new offset */
+	*new = *old;
+	*old = 0ULL;
+	to_svm(vcpu)->avic_physical_id_cache = new;
+
+	/*
+	 * Also update the guest physical APIC ID in the logical
+	 * APIC ID table entry if already setup the LDR.
+	 */
+	if (svm->ldr_reg)
+		avic_handle_ldr_update(vcpu);
+
+	return 0;
+}
+
+static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
+	u32 mod = (dfr >> 28) & 0xf;
+
+	/*
+	 * We assume that all local APICs are using the same type.
+	 * If this changes, we need to flush the AVIC logical
+	 * APID id table.
+	 */
+	if (vm_data->ldr_mode == mod)
+		return 0;
+
+	clear_page(page_address(vm_data->avic_logical_id_table_page));
+	vm_data->ldr_mode = mod;
+
+	if (svm->ldr_reg)
+		avic_handle_ldr_update(vcpu);
+	return 0;
+}
+
+static int avic_unaccel_trap_write(struct vcpu_svm *svm)
+{
+	struct kvm_lapic *apic = svm->vcpu.arch.apic;
+	u32 offset = svm->vmcb->control.exit_info_1 &
+				AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+
+	switch (offset) {
+	case APIC_ID:
+		if (avic_handle_apic_id_update(&svm->vcpu))
+			return 0;
+		break;
+	case APIC_LDR:
+		if (avic_handle_ldr_update(&svm->vcpu))
+			return 0;
+		break;
+	case APIC_DFR:
+		avic_handle_dfr_update(&svm->vcpu);
+		break;
+	default:
+		break;
+	}
+
+	kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+
+	return 1;
+}
+
+static bool is_avic_unaccelerated_access_trap(u32 offset)
+{
+	bool ret = false;
+
+	switch (offset) {
+	case APIC_ID:
+	case APIC_EOI:
+	case APIC_RRR:
+	case APIC_LDR:
+	case APIC_DFR:
+	case APIC_SPIV:
+	case APIC_ESR:
+	case APIC_ICR:
+	case APIC_LVTT:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_LVTERR:
+	case APIC_TMICT:
+	case APIC_TDCR:
+		ret = true;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+{
+	int ret = 0;
+	u32 offset = svm->vmcb->control.exit_info_1 &
+		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+	u32 vector = svm->vmcb->control.exit_info_2 &
+		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
+	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
+		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
+	bool trap = is_avic_unaccelerated_access_trap(offset);
+
+	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+					    trap, write, vector);
+	if (trap) {
+		/* Handling Trap */
+		WARN_ONCE(!write, "svm: Handling trap read.\n");
+		ret = avic_unaccel_trap_write(svm);
+	} else {
+		/* Handling Fault */
+		ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+	}
+
+	return ret;
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
@@ -3344,6 +3920,8 @@
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_NPF]				= pf_interception,
 	[SVM_EXIT_RSM]                          = emulate_on_interception,
+	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
+	[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]	= avic_unaccelerated_access_interception,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -3375,10 +3953,14 @@
 	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
 	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
 	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
+	pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
 	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
 	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
 	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
 	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
+	pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
+	pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
+	pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
 	pr_err("VMCB State Save Area:\n");
 	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
 	       "es:",
@@ -3562,6 +4144,7 @@
 {
 	struct vmcb_control_area *control;
 
+	/* The following fields are ignored when AVIC is enabled */
 	control = &svm->vmcb->control;
 	control->int_vector = irq;
 	control->int_ctl &= ~V_INTR_PRIO_MASK;
@@ -3583,11 +4166,17 @@
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
+static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
+{
+	return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+}
+
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+	if (svm_nested_virtualize_tpr(vcpu) ||
+	    kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -3606,11 +4195,28 @@
 
 static bool svm_get_enable_apicv(void)
 {
-	return false;
+	return avic;
 }
 
+static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
+{
+}
+
+static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
+{
+}
+
+/* Note: Currently only used by Hyper-V. */
 static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+
+	if (!avic)
+		return;
+
+	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
+	mark_dirty(vmcb, VMCB_INTR);
 }
 
 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -3623,6 +4229,18 @@
 	return;
 }
 
+static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+{
+	kvm_lapic_set_irr(vec, vcpu->arch.apic);
+	smp_mb__after_atomic();
+
+	if (avic_vcpu_is_running(vcpu))
+		wrmsrl(SVM_AVIC_DOORBELL,
+		       __default_cpu_present_to_apicid(vcpu->cpu));
+	else
+		kvm_vcpu_wake_up(vcpu);
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3677,6 +4295,9 @@
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	if (kvm_vcpu_apicv_active(vcpu))
+		return;
+
 	/*
 	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
 	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
@@ -3728,7 +4349,7 @@
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+	if (svm_nested_virtualize_tpr(vcpu))
 		return;
 
 	if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
@@ -3742,7 +4363,8 @@
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 cr8;
 
-	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+	if (svm_nested_virtualize_tpr(vcpu) ||
+	    kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	cr8 = kvm_get_cr8(vcpu);
@@ -4045,14 +4667,26 @@
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	struct kvm_cpuid_entry2 *entry;
 
 	/* Update nrips enabled cache */
 	svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
+	entry = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (entry)
+		entry->ecx &= ~bit(X86_FEATURE_X2APIC);
 }
 
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
 	switch (func) {
+	case 0x1:
+		if (avic)
+			entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+		break;
 	case 0x80000001:
 		if (nested)
 			entry->ecx |= (1 << 2); /* Set SVM bit */
@@ -4307,6 +4941,15 @@
 {
 }
 
+static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
+{
+	if (avic_handle_apic_id_update(vcpu) != 0)
+		return;
+	if (avic_handle_dfr_update(vcpu) != 0)
+		return;
+	avic_handle_ldr_update(vcpu);
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -4322,9 +4965,14 @@
 	.vcpu_free = svm_free_vcpu,
 	.vcpu_reset = svm_vcpu_reset,
 
+	.vm_init = avic_vm_init,
+	.vm_destroy = avic_vm_destroy,
+
 	.prepare_guest_switch = svm_prepare_guest_switch,
 	.vcpu_load = svm_vcpu_load,
 	.vcpu_put = svm_vcpu_put,
+	.vcpu_blocking = svm_vcpu_blocking,
+	.vcpu_unblocking = svm_vcpu_unblocking,
 
 	.update_bp_intercept = update_bp_intercept,
 	.get_msr = svm_get_msr,
@@ -4382,6 +5030,9 @@
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
+	.hwapic_irr_update = svm_hwapic_irr_update,
+	.hwapic_isr_update = svm_hwapic_isr_update,
+	.apicv_post_state_restore = avic_post_state_restore,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
@@ -4415,6 +5066,7 @@
 	.sched_in = svm_sched_in,
 
 	.pmu_ops = &amd_pmu_ops,
+	.deliver_posted_interrupt = svm_deliver_avic_intr,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b72743c..8de9250 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1291,6 +1291,63 @@
 		  __entry->vcpu_id, __entry->timer_index)
 );
 
+/*
+ * Tracepoint for AMD AVIC
+ */
+TRACE_EVENT(kvm_avic_incomplete_ipi,
+	    TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
+	    TP_ARGS(vcpu, icrh, icrl, id, index),
+
+	TP_STRUCT__entry(
+		__field(u32, vcpu)
+		__field(u32, icrh)
+		__field(u32, icrl)
+		__field(u32, id)
+		__field(u32, index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->icrh = icrh;
+		__entry->icrl = icrl;
+		__entry->id = id;
+		__entry->index = index;
+	),
+
+	TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+		  __entry->vcpu, __entry->icrh, __entry->icrl,
+		  __entry->id, __entry->index)
+);
+
+TRACE_EVENT(kvm_avic_unaccelerated_access,
+	    TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
+	    TP_ARGS(vcpu, offset, ft, rw, vec),
+
+	TP_STRUCT__entry(
+		__field(u32, vcpu)
+		__field(u32, offset)
+		__field(bool, ft)
+		__field(bool, rw)
+		__field(u32, vec)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->offset = offset;
+		__entry->ft = ft;
+		__entry->rw = rw;
+		__entry->vec = vec;
+	),
+
+	TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+		  __entry->vcpu,
+		  __entry->offset,
+		  __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
+		  __entry->ft ? "trap" : "fault",
+		  __entry->rw ? "write" : "read",
+		  __entry->vec)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cb47fe3..e605d1e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5050,8 +5050,8 @@
 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vmx_set_cr0(vcpu, cr0); /* enter rmode */
 	vmx->vcpu.arch.cr0 = cr0;
+	vmx_set_cr0(vcpu, cr0); /* enter rmode */
 	vmx_set_cr4(vcpu, 0);
 	vmx_set_efer(vcpu, 0);
 	vmx_fpu_activate(vcpu);
@@ -8318,19 +8318,19 @@
 		vmcs_write64(APIC_ACCESS_ADDR, hpa);
 }
 
-static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
+static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 {
 	u16 status;
 	u8 old;
 
-	if (isr == -1)
-		isr = 0;
+	if (max_isr == -1)
+		max_isr = 0;
 
 	status = vmcs_read16(GUEST_INTR_STATUS);
 	old = status >> 8;
-	if (isr != old) {
+	if (max_isr != old) {
 		status &= 0xff;
-		status |= isr << 8;
+		status |= max_isr << 8;
 		vmcs_write16(GUEST_INTR_STATUS, status);
 	}
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12f33e6..c805cf4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -161,6 +161,7 @@
 	{ "halt_exits", VCPU_STAT(halt_exits) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
@@ -2002,22 +2003,8 @@
 	vcpu->arch.pv_time_enabled = false;
 }
 
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
-	u64 delta;
-
-	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
-		return;
-
-	delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
-	vcpu->arch.st.last_steal = current->sched_info.run_delay;
-	vcpu->arch.st.accum_steal = delta;
-}
-
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-	accumulate_steal_time(vcpu);
-
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
@@ -2025,9 +2012,26 @@
 		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
 		return;
 
-	vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
-	vcpu->arch.st.steal.version += 2;
-	vcpu->arch.st.accum_steal = 0;
+	if (vcpu->arch.st.steal.version & 1)
+		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
+
+	vcpu->arch.st.steal.version += 1;
+
+	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+	smp_wmb();
+
+	vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+		vcpu->arch.st.last_steal;
+	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+	smp_wmb();
+
+	vcpu->arch.st.steal.version += 1;
 
 	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
 		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -7752,6 +7756,9 @@
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
 
+	if (kvm_x86_ops->vm_init)
+		return kvm_x86_ops->vm_init(kvm);
+
 	return 0;
 }
 
@@ -7873,6 +7880,8 @@
 		x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
 		x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
 	}
+	if (kvm_x86_ops->vm_destroy)
+		kvm_x86_ops->vm_destroy(kvm);
 	kvm_iommu_unmap_guest(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
@@ -8355,19 +8364,21 @@
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+bool kvm_arch_has_irq_bypass(void)
+{
+	return kvm_x86_ops->update_pi_irte != NULL;
+}
+
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
 				      struct irq_bypass_producer *prod)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-	if (kvm_x86_ops->update_pi_irte) {
-		irqfd->producer = prod;
-		return kvm_x86_ops->update_pi_irte(irqfd->kvm,
-				prod->irq, irqfd->gsi, 1);
-	}
+	irqfd->producer = prod;
 
-	return -EINVAL;
+	return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+					   prod->irq, irqfd->gsi, 1);
 }
 
 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8377,11 +8388,6 @@
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-	if (!kvm_x86_ops->update_pi_irte) {
-		WARN_ON(irqfd->producer != NULL);
-		return;
-	}
-
 	WARN_ON(irqfd->producer != prod);
 	irqfd->producer = NULL;
 
@@ -8429,3 +8435,5 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 381a43c..8196054 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -516,7 +516,7 @@
 
 int __init pcibios_init(void)
 {
-	if (!raw_pci_ops) {
+	if (!raw_pci_ops && !raw_pci_ext_ops) {
 		printk(KERN_WARNING "PCI: System does not support PCI\n");
 		return 0;
 	}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b7de192..837ea36 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -552,9 +552,16 @@
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
 
+/*
+ * Broadwell EP Home Agent BARs erroneously return non-zero values when read.
+ *
+ * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
+ * entry BDF2.
+ */
 static void pci_bdwep_bar(struct pci_dev *dev)
 {
 	dev->non_compliant_bars = 1;
 }
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
diff --git a/certs/Kconfig b/certs/Kconfig
index f0f8a44..fc5955f 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -17,6 +17,7 @@
 config SYSTEM_TRUSTED_KEYRING
 	bool "Provide system-wide ring of trusted keys"
 	depends on KEYS
+	depends on ASYMMETRIC_KEY_TYPE
 	help
 	  Provide a system keyring to which trusted keys can be added.  Keys in
 	  the keyring are considered to be trusted.  Keys may be added at will
@@ -55,4 +56,12 @@
 	  This is the number of bytes reserved in the kernel image for a
 	  certificate to be inserted.
 
+config SECONDARY_TRUSTED_KEYRING
+	bool "Provide a keyring to which extra trustable keys may be added"
+	depends on SYSTEM_TRUSTED_KEYRING
+	help
+	  If set, provide a keyring to which extra keys may be added, provided
+	  those keys are not blacklisted and are vouched for by a key built
+	  into the kernel or already in the secondary trusted keyring.
+
 endmenu
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index f418032..50979d6 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -18,29 +18,88 @@
 #include <keys/system_keyring.h>
 #include <crypto/pkcs7.h>
 
-struct key *system_trusted_keyring;
-EXPORT_SYMBOL_GPL(system_trusted_keyring);
+static struct key *builtin_trusted_keys;
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+static struct key *secondary_trusted_keys;
+#endif
 
 extern __initconst const u8 system_certificate_list[];
 extern __initconst const unsigned long system_certificate_list_size;
 
+/**
+ * restrict_link_to_builtin_trusted - Restrict keyring addition by built in CA
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in the built in system keyring.
+ */
+int restrict_link_by_builtin_trusted(struct key *keyring,
+				     const struct key_type *type,
+				     const union key_payload *payload)
+{
+	return restrict_link_by_signature(builtin_trusted_keys, type, payload);
+}
+
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+/**
+ * restrict_link_by_builtin_and_secondary_trusted - Restrict keyring
+ *   addition by both builtin and secondary keyrings
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in either the built-in or the secondary system
+ * keyrings.
+ */
+int restrict_link_by_builtin_and_secondary_trusted(
+	struct key *keyring,
+	const struct key_type *type,
+	const union key_payload *payload)
+{
+	/* If we have a secondary trusted keyring, then that contains a link
+	 * through to the builtin keyring and the search will follow that link.
+	 */
+	if (type == &key_type_keyring &&
+	    keyring == secondary_trusted_keys &&
+	    payload == &builtin_trusted_keys->payload)
+		/* Allow the builtin keyring to be added to the secondary */
+		return 0;
+
+	return restrict_link_by_signature(secondary_trusted_keys, type, payload);
+}
+#endif
+
 /*
- * Load the compiled-in keys
+ * Create the trusted keyrings
  */
 static __init int system_trusted_keyring_init(void)
 {
-	pr_notice("Initialise system trusted keyring\n");
+	pr_notice("Initialise system trusted keyrings\n");
 
-	system_trusted_keyring =
-		keyring_alloc(".system_keyring",
+	builtin_trusted_keys =
+		keyring_alloc(".builtin_trusted_keys",
 			      KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
 			      ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 			      KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
-			      KEY_ALLOC_NOT_IN_QUOTA, NULL);
-	if (IS_ERR(system_trusted_keyring))
-		panic("Can't allocate system trusted keyring\n");
+			      KEY_ALLOC_NOT_IN_QUOTA,
+			      NULL, NULL);
+	if (IS_ERR(builtin_trusted_keys))
+		panic("Can't allocate builtin trusted keyring\n");
 
-	set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+	secondary_trusted_keys =
+		keyring_alloc(".secondary_trusted_keys",
+			      KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+			      ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			       KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+			       KEY_USR_WRITE),
+			      KEY_ALLOC_NOT_IN_QUOTA,
+			      restrict_link_by_builtin_and_secondary_trusted,
+			      NULL);
+	if (IS_ERR(secondary_trusted_keys))
+		panic("Can't allocate secondary trusted keyring\n");
+
+	if (key_link(secondary_trusted_keys, builtin_trusted_keys) < 0)
+		panic("Can't link trusted keyrings\n");
+#endif
+
 	return 0;
 }
 
@@ -76,7 +135,7 @@
 		if (plen > end - p)
 			goto dodgy_cert;
 
-		key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+		key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1),
 					   "asymmetric",
 					   NULL,
 					   p,
@@ -84,8 +143,8 @@
 					   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 					   KEY_USR_VIEW | KEY_USR_READ),
 					   KEY_ALLOC_NOT_IN_QUOTA |
-					   KEY_ALLOC_TRUSTED |
-					   KEY_ALLOC_BUILT_IN);
+					   KEY_ALLOC_BUILT_IN |
+					   KEY_ALLOC_BYPASS_RESTRICTION);
 		if (IS_ERR(key)) {
 			pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
 			       PTR_ERR(key));
@@ -108,19 +167,27 @@
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 /**
- * Verify a PKCS#7-based signature on system data.
- * @data: The data to be verified.
+ * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * @data: The data to be verified (NULL if expecting internal data).
  * @len: Size of @data.
  * @raw_pkcs7: The PKCS#7 message that is the signature.
  * @pkcs7_len: The size of @raw_pkcs7.
+ * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
+ *					(void *)1UL for all trusted keys).
  * @usage: The use to which the key is being put.
+ * @view_content: Callback to gain access to content.
+ * @ctx: Context for callback.
  */
-int system_verify_data(const void *data, unsigned long len,
-		       const void *raw_pkcs7, size_t pkcs7_len,
-		       enum key_being_used_for usage)
+int verify_pkcs7_signature(const void *data, size_t len,
+			   const void *raw_pkcs7, size_t pkcs7_len,
+			   struct key *trusted_keys,
+			   enum key_being_used_for usage,
+			   int (*view_content)(void *ctx,
+					       const void *data, size_t len,
+					       size_t asn1hdrlen),
+			   void *ctx)
 {
 	struct pkcs7_message *pkcs7;
-	bool trusted;
 	int ret;
 
 	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
@@ -128,7 +195,7 @@
 		return PTR_ERR(pkcs7);
 
 	/* The data should be detached - so we need to supply it. */
-	if (pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
+	if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
 		pr_err("PKCS#7 signature with non-detached data\n");
 		ret = -EBADMSG;
 		goto error;
@@ -138,13 +205,33 @@
 	if (ret < 0)
 		goto error;
 
-	ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted);
-	if (ret < 0)
+	if (!trusted_keys) {
+		trusted_keys = builtin_trusted_keys;
+	} else if (trusted_keys == (void *)1UL) {
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+		trusted_keys = secondary_trusted_keys;
+#else
+		trusted_keys = builtin_trusted_keys;
+#endif
+	}
+	ret = pkcs7_validate_trust(pkcs7, trusted_keys);
+	if (ret < 0) {
+		if (ret == -ENOKEY)
+			pr_err("PKCS#7 signature not signed with a trusted key\n");
 		goto error;
+	}
 
-	if (!trusted) {
-		pr_err("PKCS#7 signature not signed with a trusted key\n");
-		ret = -ENOKEY;
+	if (view_content) {
+		size_t asn1hdrlen;
+
+		ret = pkcs7_get_content_data(pkcs7, &data, &len, &asn1hdrlen);
+		if (ret < 0) {
+			if (ret == -ENODATA)
+				pr_devel("PKCS#7 message does not contain data\n");
+			goto error;
+		}
+
+		ret = view_content(ctx, data, len, asn1hdrlen);
 	}
 
 error:
@@ -152,6 +239,6 @@
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(system_verify_data);
+EXPORT_SYMBOL_GPL(verify_pkcs7_signature);
 
 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 91a7e04..e28e912 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -1,5 +1,5 @@
 menuconfig ASYMMETRIC_KEY_TYPE
-	tristate "Asymmetric (public-key cryptographic) key type"
+	bool "Asymmetric (public-key cryptographic) key type"
 	depends on KEYS
 	help
 	  This option provides support for a key type that holds the data for
@@ -40,8 +40,7 @@
 
 config PKCS7_TEST_KEY
 	tristate "PKCS#7 testing key type"
-	depends on PKCS7_MESSAGE_PARSER
-	select SYSTEM_TRUSTED_KEYRING
+	depends on SYSTEM_DATA_VERIFICATION
 	help
 	  This option provides a type of key that can be loaded up from a
 	  PKCS#7 message - provided the message is signed by a trusted key.  If
@@ -54,6 +53,7 @@
 config SIGNED_PE_FILE_VERIFICATION
 	bool "Support for PE file signature verification"
 	depends on PKCS7_MESSAGE_PARSER=y
+	depends on SYSTEM_DATA_VERIFICATION
 	select ASN1
 	select OID_REGISTRY
 	help
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index f904862..6516855 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -4,7 +4,10 @@
 
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys.o
 
-asymmetric_keys-y := asymmetric_type.o signature.o
+asymmetric_keys-y := \
+	asymmetric_type.o \
+	restrict.o \
+	signature.o
 
 obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
 
diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h
index 1d450b5..ca8e9ac 100644
--- a/crypto/asymmetric_keys/asymmetric_keys.h
+++ b/crypto/asymmetric_keys/asymmetric_keys.h
@@ -9,6 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#include <keys/asymmetric-type.h>
+
 extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id);
 
 extern int __asymmetric_key_hex_to_key_id(const char *id,
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 9f2165b..6600181 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -35,6 +35,95 @@
 static DECLARE_RWSEM(asymmetric_key_parsers_sem);
 
 /**
+ * find_asymmetric_key - Find a key by ID.
+ * @keyring: The keys to search.
+ * @id_0: The first ID to look for or NULL.
+ * @id_1: The second ID to look for or NULL.
+ * @partial: Use partial match if true, exact if false.
+ *
+ * Find a key in the given keyring by identifier.  The preferred identifier is
+ * the id_0 and the fallback identifier is the id_1.  If both are given, the
+ * lookup is by the former, but the latter must also match.
+ */
+struct key *find_asymmetric_key(struct key *keyring,
+				const struct asymmetric_key_id *id_0,
+				const struct asymmetric_key_id *id_1,
+				bool partial)
+{
+	struct key *key;
+	key_ref_t ref;
+	const char *lookup;
+	char *req, *p;
+	int len;
+
+	if (id_0) {
+		lookup = id_0->data;
+		len = id_0->len;
+	} else {
+		lookup = id_1->data;
+		len = id_1->len;
+	}
+
+	/* Construct an identifier "id:<keyid>". */
+	p = req = kmalloc(2 + 1 + len * 2 + 1, GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	if (partial) {
+		*p++ = 'i';
+		*p++ = 'd';
+	} else {
+		*p++ = 'e';
+		*p++ = 'x';
+	}
+	*p++ = ':';
+	p = bin2hex(p, lookup, len);
+	*p = 0;
+
+	pr_debug("Look up: \"%s\"\n", req);
+
+	ref = keyring_search(make_key_ref(keyring, 1),
+			     &key_type_asymmetric, req);
+	if (IS_ERR(ref))
+		pr_debug("Request for key '%s' err %ld\n", req, PTR_ERR(ref));
+	kfree(req);
+
+	if (IS_ERR(ref)) {
+		switch (PTR_ERR(ref)) {
+			/* Hide some search errors */
+		case -EACCES:
+		case -ENOTDIR:
+		case -EAGAIN:
+			return ERR_PTR(-ENOKEY);
+		default:
+			return ERR_CAST(ref);
+		}
+	}
+
+	key = key_ref_to_ptr(ref);
+	if (id_0 && id_1) {
+		const struct asymmetric_key_ids *kids = asymmetric_key_ids(key);
+
+		if (!kids->id[0]) {
+			pr_debug("First ID matches, but second is missing\n");
+			goto reject;
+		}
+		if (!asymmetric_key_id_same(id_1, kids->id[1])) {
+			pr_debug("First ID matches, but second does not\n");
+			goto reject;
+		}
+	}
+
+	pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key));
+	return key;
+
+reject:
+	key_put(key);
+	return ERR_PTR(-EKEYREJECTED);
+}
+EXPORT_SYMBOL_GPL(find_asymmetric_key);
+
+/**
  * asymmetric_key_generate_id: Construct an asymmetric key ID
  * @val_1: First binary blob
  * @len_1: Length of first binary blob
@@ -331,7 +420,8 @@
 	pr_devel("==>%s()\n", __func__);
 
 	if (subtype) {
-		subtype->destroy(prep->payload.data[asym_crypto]);
+		subtype->destroy(prep->payload.data[asym_crypto],
+				 prep->payload.data[asym_auth]);
 		module_put(subtype->owner);
 	}
 	asymmetric_key_free_kids(kids);
@@ -346,13 +436,15 @@
 	struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
 	struct asymmetric_key_ids *kids = key->payload.data[asym_key_ids];
 	void *data = key->payload.data[asym_crypto];
+	void *auth = key->payload.data[asym_auth];
 
 	key->payload.data[asym_crypto] = NULL;
 	key->payload.data[asym_subtype] = NULL;
 	key->payload.data[asym_key_ids] = NULL;
+	key->payload.data[asym_auth] = NULL;
 
 	if (subtype) {
-		subtype->destroy(data);
+		subtype->destroy(data, auth);
 		module_put(subtype->owner);
 	}
 
diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c
index 3242cbf..6a76d5c 100644
--- a/crypto/asymmetric_keys/mscode_parser.c
+++ b/crypto/asymmetric_keys/mscode_parser.c
@@ -21,19 +21,13 @@
 /*
  * Parse a Microsoft Individual Code Signing blob
  */
-int mscode_parse(struct pefile_context *ctx)
+int mscode_parse(void *_ctx, const void *content_data, size_t data_len,
+		 size_t asn1hdrlen)
 {
-	const void *content_data;
-	size_t data_len;
-	int ret;
+	struct pefile_context *ctx = _ctx;
 
-	ret = pkcs7_get_content_data(ctx->pkcs7, &content_data, &data_len, 1);
-
-	if (ret) {
-		pr_debug("PKCS#7 message does not contain data\n");
-		return ret;
-	}
-
+	content_data -= asn1hdrlen;
+	data_len += asn1hdrlen;
 	pr_devel("Data: %zu [%*ph]\n", data_len, (unsigned)(data_len),
 		 content_data);
 
@@ -129,7 +123,6 @@
 {
 	struct pefile_context *ctx = context;
 
-	ctx->digest = value;
-	ctx->digest_len = vlen;
-	return 0;
+	ctx->digest = kmemdup(value, vlen, GFP_KERNEL);
+	return ctx->digest ? 0 : -ENOMEM;
 }
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index e2d0edb..3b92523 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -13,12 +13,9 @@
 #include <linux/key.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/verification.h>
 #include <linux/key-type.h>
-#include <keys/asymmetric-type.h>
-#include <crypto/pkcs7.h>
 #include <keys/user-type.h>
-#include <keys/system_keyring.h>
-#include "pkcs7_parser.h"
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("PKCS#7 testing key type");
@@ -29,57 +26,44 @@
 		 "Usage to specify when verifying the PKCS#7 message");
 
 /*
+ * Retrieve the PKCS#7 message content.
+ */
+static int pkcs7_view_content(void *ctx, const void *data, size_t len,
+			      size_t asn1hdrlen)
+{
+	struct key_preparsed_payload *prep = ctx;
+	const void *saved_prep_data;
+	size_t saved_prep_datalen;
+	int ret;
+
+	saved_prep_data = prep->data;
+	saved_prep_datalen = prep->datalen;
+	prep->data = data;
+	prep->datalen = len;
+
+	ret = user_preparse(prep);
+
+	prep->data = saved_prep_data;
+	prep->datalen = saved_prep_datalen;
+	return ret;
+}
+
+/*
  * Preparse a PKCS#7 wrapped and validated data blob.
  */
 static int pkcs7_preparse(struct key_preparsed_payload *prep)
 {
 	enum key_being_used_for usage = pkcs7_usage;
-	struct pkcs7_message *pkcs7;
-	const void *data, *saved_prep_data;
-	size_t datalen, saved_prep_datalen;
-	bool trusted;
-	int ret;
-
-	kenter("");
 
 	if (usage >= NR__KEY_BEING_USED_FOR) {
 		pr_err("Invalid usage type %d\n", usage);
 		return -EINVAL;
 	}
 
-	saved_prep_data = prep->data;
-	saved_prep_datalen = prep->datalen;
-	pkcs7 = pkcs7_parse_message(saved_prep_data, saved_prep_datalen);
-	if (IS_ERR(pkcs7)) {
-		ret = PTR_ERR(pkcs7);
-		goto error;
-	}
-
-	ret = pkcs7_verify(pkcs7, usage);
-	if (ret < 0)
-		goto error_free;
-
-	ret = pkcs7_validate_trust(pkcs7, system_trusted_keyring, &trusted);
-	if (ret < 0)
-		goto error_free;
-	if (!trusted)
-		pr_warn("PKCS#7 message doesn't chain back to a trusted key\n");
-
-	ret = pkcs7_get_content_data(pkcs7, &data, &datalen, false);
-	if (ret < 0)
-		goto error_free;
-
-	prep->data = data;
-	prep->datalen = datalen;
-	ret = user_preparse(prep);
-	prep->data = saved_prep_data;
-	prep->datalen = saved_prep_datalen;
-
-error_free:
-	pkcs7_free_message(pkcs7);
-error:
-	kleave(" = %d", ret);
-	return ret;
+	return verify_pkcs7_signature(NULL, 0,
+				      prep->data, prep->datalen,
+				      NULL, usage,
+				      pkcs7_view_content, prep);
 }
 
 /*
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index bdd0d753..af4cd86 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -44,9 +44,7 @@
 static void pkcs7_free_signed_info(struct pkcs7_signed_info *sinfo)
 {
 	if (sinfo) {
-		kfree(sinfo->sig.s);
-		kfree(sinfo->sig.digest);
-		kfree(sinfo->signing_cert_id);
+		public_key_signature_free(sinfo->sig);
 		kfree(sinfo);
 	}
 }
@@ -125,6 +123,10 @@
 	ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL);
 	if (!ctx->sinfo)
 		goto out_no_sinfo;
+	ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature),
+				  GFP_KERNEL);
+	if (!ctx->sinfo->sig)
+		goto out_no_sig;
 
 	ctx->data = (unsigned long)data;
 	ctx->ppcerts = &ctx->certs;
@@ -150,6 +152,7 @@
 		ctx->certs = cert->next;
 		x509_free_certificate(cert);
 	}
+out_no_sig:
 	pkcs7_free_signed_info(ctx->sinfo);
 out_no_sinfo:
 	pkcs7_free_message(ctx->msg);
@@ -165,24 +168,25 @@
  * @pkcs7: The preparsed PKCS#7 message to access
  * @_data: Place to return a pointer to the data
  * @_data_len: Place to return the data length
- * @want_wrapper: True if the ASN.1 object header should be included in the data
+ * @_headerlen: Size of ASN.1 header not included in _data
  *
- * Get access to the data content of the PKCS#7 message, including, optionally,
- * the header of the ASN.1 object that contains it.  Returns -ENODATA if the
- * data object was missing from the message.
+ * Get access to the data content of the PKCS#7 message.  The size of the
+ * header of the ASN.1 object that contains it is also provided and can be used
+ * to adjust *_data and *_data_len to get the entire object.
+ *
+ * Returns -ENODATA if the data object was missing from the message.
  */
 int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
 			   const void **_data, size_t *_data_len,
-			   bool want_wrapper)
+			   size_t *_headerlen)
 {
-	size_t wrapper;
-
 	if (!pkcs7->data)
 		return -ENODATA;
 
-	wrapper = want_wrapper ? pkcs7->data_hdrlen : 0;
-	*_data = pkcs7->data - wrapper;
-	*_data_len = pkcs7->data_len + wrapper;
+	*_data = pkcs7->data;
+	*_data_len = pkcs7->data_len;
+	if (_headerlen)
+		*_headerlen = pkcs7->data_hdrlen;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pkcs7_get_content_data);
@@ -218,25 +222,25 @@
 
 	switch (ctx->last_oid) {
 	case OID_md4:
-		ctx->sinfo->sig.hash_algo = "md4";
+		ctx->sinfo->sig->hash_algo = "md4";
 		break;
 	case OID_md5:
-		ctx->sinfo->sig.hash_algo = "md5";
+		ctx->sinfo->sig->hash_algo = "md5";
 		break;
 	case OID_sha1:
-		ctx->sinfo->sig.hash_algo = "sha1";
+		ctx->sinfo->sig->hash_algo = "sha1";
 		break;
 	case OID_sha256:
-		ctx->sinfo->sig.hash_algo = "sha256";
+		ctx->sinfo->sig->hash_algo = "sha256";
 		break;
 	case OID_sha384:
-		ctx->sinfo->sig.hash_algo = "sha384";
+		ctx->sinfo->sig->hash_algo = "sha384";
 		break;
 	case OID_sha512:
-		ctx->sinfo->sig.hash_algo = "sha512";
+		ctx->sinfo->sig->hash_algo = "sha512";
 		break;
 	case OID_sha224:
-		ctx->sinfo->sig.hash_algo = "sha224";
+		ctx->sinfo->sig->hash_algo = "sha224";
 		break;
 	default:
 		printk("Unsupported digest algo: %u\n", ctx->last_oid);
@@ -256,7 +260,7 @@
 
 	switch (ctx->last_oid) {
 	case OID_rsaEncryption:
-		ctx->sinfo->sig.pkey_algo = "rsa";
+		ctx->sinfo->sig->pkey_algo = "rsa";
 		break;
 	default:
 		printk("Unsupported pkey algo: %u\n", ctx->last_oid);
@@ -616,11 +620,11 @@
 {
 	struct pkcs7_parse_context *ctx = context;
 
-	ctx->sinfo->sig.s = kmemdup(value, vlen, GFP_KERNEL);
-	if (!ctx->sinfo->sig.s)
+	ctx->sinfo->sig->s = kmemdup(value, vlen, GFP_KERNEL);
+	if (!ctx->sinfo->sig->s)
 		return -ENOMEM;
 
-	ctx->sinfo->sig.s_size = vlen;
+	ctx->sinfo->sig->s_size = vlen;
 	return 0;
 }
 
@@ -656,12 +660,16 @@
 
 	pr_devel("SINFO KID: %u [%*phN]\n", kid->len, kid->len, kid->data);
 
-	sinfo->signing_cert_id = kid;
+	sinfo->sig->auth_ids[0] = kid;
 	sinfo->index = ++ctx->sinfo_index;
 	*ctx->ppsinfo = sinfo;
 	ctx->ppsinfo = &sinfo->next;
 	ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL);
 	if (!ctx->sinfo)
 		return -ENOMEM;
+	ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature),
+				  GFP_KERNEL);
+	if (!ctx->sinfo->sig)
+		return -ENOMEM;
 	return 0;
 }
diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h
index a66b19e..f4e8107 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.h
+++ b/crypto/asymmetric_keys/pkcs7_parser.h
@@ -22,7 +22,6 @@
 	struct pkcs7_signed_info *next;
 	struct x509_certificate *signer; /* Signing certificate (in msg->certs) */
 	unsigned	index;
-	bool		trusted;
 	bool		unsupported_crypto;	/* T if not usable due to missing crypto */
 
 	/* Message digest - the digest of the Content Data (or NULL) */
@@ -41,19 +40,17 @@
 #define	sinfo_has_ms_statement_type	5
 	time64_t	signing_time;
 
-	/* Issuing cert serial number and issuer's name [PKCS#7 or CMS ver 1]
-	 * or issuing cert's SKID [CMS ver 3].
-	 */
-	struct asymmetric_key_id *signing_cert_id;
-
 	/* Message signature.
 	 *
 	 * This contains the generated digest of _either_ the Content Data or
 	 * the Authenticated Attributes [RFC2315 9.3].  If the latter, one of
 	 * the attributes contains the digest of the the Content Data within
 	 * it.
+	 *
+	 * THis also contains the issuing cert serial number and issuer's name
+	 * [PKCS#7 or CMS ver 1] or issuing cert's SKID [CMS ver 3].
 	 */
-	struct public_key_signature sig;
+	struct public_key_signature *sig;
 };
 
 struct pkcs7_message {
diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 7d7a39b4..f6a009d 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -27,10 +27,9 @@
 				    struct pkcs7_signed_info *sinfo,
 				    struct key *trust_keyring)
 {
-	struct public_key_signature *sig = &sinfo->sig;
+	struct public_key_signature *sig = sinfo->sig;
 	struct x509_certificate *x509, *last = NULL, *p;
 	struct key *key;
-	bool trusted;
 	int ret;
 
 	kenter(",%u,", sinfo->index);
@@ -42,10 +41,8 @@
 
 	for (x509 = sinfo->signer; x509; x509 = x509->signer) {
 		if (x509->seen) {
-			if (x509->verified) {
-				trusted = x509->trusted;
+			if (x509->verified)
 				goto verified;
-			}
 			kleave(" = -ENOKEY [cached]");
 			return -ENOKEY;
 		}
@@ -54,9 +51,8 @@
 		/* Look to see if this certificate is present in the trusted
 		 * keys.
 		 */
-		key = x509_request_asymmetric_key(trust_keyring,
-						  x509->id, x509->skid,
-						  false);
+		key = find_asymmetric_key(trust_keyring,
+					  x509->id, x509->skid, false);
 		if (!IS_ERR(key)) {
 			/* One of the X.509 certificates in the PKCS#7 message
 			 * is apparently the same as one we already trust.
@@ -80,17 +76,17 @@
 
 		might_sleep();
 		last = x509;
-		sig = &last->sig;
+		sig = last->sig;
 	}
 
 	/* No match - see if the root certificate has a signer amongst the
 	 * trusted keys.
 	 */
-	if (last && (last->akid_id || last->akid_skid)) {
-		key = x509_request_asymmetric_key(trust_keyring,
-						  last->akid_id,
-						  last->akid_skid,
-						  false);
+	if (last && (last->sig->auth_ids[0] || last->sig->auth_ids[1])) {
+		key = find_asymmetric_key(trust_keyring,
+					  last->sig->auth_ids[0],
+					  last->sig->auth_ids[1],
+					  false);
 		if (!IS_ERR(key)) {
 			x509 = last;
 			pr_devel("sinfo %u: Root cert %u signer is key %x\n",
@@ -104,10 +100,8 @@
 	/* As a last resort, see if we have a trusted public key that matches
 	 * the signed info directly.
 	 */
-	key = x509_request_asymmetric_key(trust_keyring,
-					  sinfo->signing_cert_id,
-					  NULL,
-					  false);
+	key = find_asymmetric_key(trust_keyring,
+				  sinfo->sig->auth_ids[0], NULL, false);
 	if (!IS_ERR(key)) {
 		pr_devel("sinfo %u: Direct signer is key %x\n",
 			 sinfo->index, key_serial(key));
@@ -122,7 +116,6 @@
 
 matched:
 	ret = verify_signature(key, sig);
-	trusted = test_bit(KEY_FLAG_TRUSTED, &key->flags);
 	key_put(key);
 	if (ret < 0) {
 		if (ret == -ENOMEM)
@@ -134,12 +127,9 @@
 verified:
 	if (x509) {
 		x509->verified = true;
-		for (p = sinfo->signer; p != x509; p = p->signer) {
+		for (p = sinfo->signer; p != x509; p = p->signer)
 			p->verified = true;
-			p->trusted = trusted;
-		}
 	}
-	sinfo->trusted = trusted;
 	kleave(" = 0");
 	return 0;
 }
@@ -148,7 +138,6 @@
  * pkcs7_validate_trust - Validate PKCS#7 trust chain
  * @pkcs7: The PKCS#7 certificate to validate
  * @trust_keyring: Signing certificates to use as starting points
- * @_trusted: Set to true if trustworth, false otherwise
  *
  * Validate that the certificate chain inside the PKCS#7 message intersects
  * keys we already know and trust.
@@ -170,16 +159,13 @@
  * May also return -ENOMEM.
  */
 int pkcs7_validate_trust(struct pkcs7_message *pkcs7,
-			 struct key *trust_keyring,
-			 bool *_trusted)
+			 struct key *trust_keyring)
 {
 	struct pkcs7_signed_info *sinfo;
 	struct x509_certificate *p;
 	int cached_ret = -ENOKEY;
 	int ret;
 
-	*_trusted = false;
-
 	for (p = pkcs7->certs; p; p = p->next)
 		p->seen = false;
 
@@ -193,7 +179,6 @@
 				cached_ret = -ENOPKG;
 			continue;
 		case 0:
-			*_trusted |= sinfo->trusted;
 			cached_ret = 0;
 			continue;
 		default:
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 50be2a1..44b746e 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -25,34 +25,36 @@
 static int pkcs7_digest(struct pkcs7_message *pkcs7,
 			struct pkcs7_signed_info *sinfo)
 {
+	struct public_key_signature *sig = sinfo->sig;
 	struct crypto_shash *tfm;
 	struct shash_desc *desc;
-	size_t digest_size, desc_size;
-	void *digest;
+	size_t desc_size;
 	int ret;
 
-	kenter(",%u,%s", sinfo->index, sinfo->sig.hash_algo);
+	kenter(",%u,%s", sinfo->index, sinfo->sig->hash_algo);
 
-	if (!sinfo->sig.hash_algo)
+	if (!sinfo->sig->hash_algo)
 		return -ENOPKG;
 
 	/* Allocate the hashing algorithm we're going to need and find out how
 	 * big the hash operational data will be.
 	 */
-	tfm = crypto_alloc_shash(sinfo->sig.hash_algo, 0, 0);
+	tfm = crypto_alloc_shash(sinfo->sig->hash_algo, 0, 0);
 	if (IS_ERR(tfm))
 		return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm);
 
 	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
-	sinfo->sig.digest_size = digest_size = crypto_shash_digestsize(tfm);
+	sig->digest_size = crypto_shash_digestsize(tfm);
 
 	ret = -ENOMEM;
-	digest = kzalloc(ALIGN(digest_size, __alignof__(*desc)) + desc_size,
-			 GFP_KERNEL);
-	if (!digest)
+	sig->digest = kmalloc(sig->digest_size, GFP_KERNEL);
+	if (!sig->digest)
 		goto error_no_desc;
 
-	desc = PTR_ALIGN(digest + digest_size, __alignof__(*desc));
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc)
+		goto error_no_desc;
+
 	desc->tfm   = tfm;
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
@@ -60,10 +62,11 @@
 	ret = crypto_shash_init(desc);
 	if (ret < 0)
 		goto error;
-	ret = crypto_shash_finup(desc, pkcs7->data, pkcs7->data_len, digest);
+	ret = crypto_shash_finup(desc, pkcs7->data, pkcs7->data_len,
+				 sig->digest);
 	if (ret < 0)
 		goto error;
-	pr_devel("MsgDigest = [%*ph]\n", 8, digest);
+	pr_devel("MsgDigest = [%*ph]\n", 8, sig->digest);
 
 	/* However, if there are authenticated attributes, there must be a
 	 * message digest attribute amongst them which corresponds to the
@@ -78,14 +81,15 @@
 			goto error;
 		}
 
-		if (sinfo->msgdigest_len != sinfo->sig.digest_size) {
+		if (sinfo->msgdigest_len != sig->digest_size) {
 			pr_debug("Sig %u: Invalid digest size (%u)\n",
 				 sinfo->index, sinfo->msgdigest_len);
 			ret = -EBADMSG;
 			goto error;
 		}
 
-		if (memcmp(digest, sinfo->msgdigest, sinfo->msgdigest_len) != 0) {
+		if (memcmp(sig->digest, sinfo->msgdigest,
+			   sinfo->msgdigest_len) != 0) {
 			pr_debug("Sig %u: Message digest doesn't match\n",
 				 sinfo->index);
 			ret = -EKEYREJECTED;
@@ -97,7 +101,7 @@
 		 * convert the attributes from a CONT.0 into a SET before we
 		 * hash it.
 		 */
-		memset(digest, 0, sinfo->sig.digest_size);
+		memset(sig->digest, 0, sig->digest_size);
 
 		ret = crypto_shash_init(desc);
 		if (ret < 0)
@@ -107,17 +111,14 @@
 		if (ret < 0)
 			goto error;
 		ret = crypto_shash_finup(desc, sinfo->authattrs,
-					 sinfo->authattrs_len, digest);
+					 sinfo->authattrs_len, sig->digest);
 		if (ret < 0)
 			goto error;
-		pr_devel("AADigest = [%*ph]\n", 8, digest);
+		pr_devel("AADigest = [%*ph]\n", 8, sig->digest);
 	}
 
-	sinfo->sig.digest = digest;
-	digest = NULL;
-
 error:
-	kfree(digest);
+	kfree(desc);
 error_no_desc:
 	crypto_free_shash(tfm);
 	kleave(" = %d", ret);
@@ -144,12 +145,12 @@
 		 * PKCS#7 message - but I can't be 100% sure of that.  It's
 		 * possible this will need element-by-element comparison.
 		 */
-		if (!asymmetric_key_id_same(x509->id, sinfo->signing_cert_id))
+		if (!asymmetric_key_id_same(x509->id, sinfo->sig->auth_ids[0]))
 			continue;
 		pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
 			 sinfo->index, certix);
 
-		if (x509->pub->pkey_algo != sinfo->sig.pkey_algo) {
+		if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) {
 			pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
 				sinfo->index);
 			continue;
@@ -164,7 +165,7 @@
 	 */
 	pr_debug("Sig %u: Issuing X.509 cert not found (#%*phN)\n",
 		 sinfo->index,
-		 sinfo->signing_cert_id->len, sinfo->signing_cert_id->data);
+		 sinfo->sig->auth_ids[0]->len, sinfo->sig->auth_ids[0]->data);
 	return 0;
 }
 
@@ -174,6 +175,7 @@
 static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 				  struct pkcs7_signed_info *sinfo)
 {
+	struct public_key_signature *sig;
 	struct x509_certificate *x509 = sinfo->signer, *p;
 	struct asymmetric_key_id *auth;
 	int ret;
@@ -188,34 +190,26 @@
 			 x509->subject,
 			 x509->raw_serial_size, x509->raw_serial);
 		x509->seen = true;
-		ret = x509_get_sig_params(x509);
-		if (ret < 0)
-			goto maybe_missing_crypto_in_x509;
+		if (x509->unsupported_key)
+			goto unsupported_crypto_in_x509;
 
 		pr_debug("- issuer %s\n", x509->issuer);
-		if (x509->akid_id)
+		sig = x509->sig;
+		if (sig->auth_ids[0])
 			pr_debug("- authkeyid.id %*phN\n",
-				 x509->akid_id->len, x509->akid_id->data);
-		if (x509->akid_skid)
+				 sig->auth_ids[0]->len, sig->auth_ids[0]->data);
+		if (sig->auth_ids[1])
 			pr_debug("- authkeyid.skid %*phN\n",
-				 x509->akid_skid->len, x509->akid_skid->data);
+				 sig->auth_ids[1]->len, sig->auth_ids[1]->data);
 
-		if ((!x509->akid_id && !x509->akid_skid) ||
-		    strcmp(x509->subject, x509->issuer) == 0) {
+		if (x509->self_signed) {
 			/* If there's no authority certificate specified, then
 			 * the certificate must be self-signed and is the root
 			 * of the chain.  Likewise if the cert is its own
 			 * authority.
 			 */
-			pr_debug("- no auth?\n");
-			if (x509->raw_subject_size != x509->raw_issuer_size ||
-			    memcmp(x509->raw_subject, x509->raw_issuer,
-				   x509->raw_issuer_size) != 0)
-				return 0;
-
-			ret = x509_check_signature(x509->pub, x509);
-			if (ret < 0)
-				goto maybe_missing_crypto_in_x509;
+			if (x509->unsupported_sig)
+				goto unsupported_crypto_in_x509;
 			x509->signer = x509;
 			pr_debug("- self-signed\n");
 			return 0;
@@ -224,7 +218,7 @@
 		/* Look through the X.509 certificates in the PKCS#7 message's
 		 * list to see if the next one is there.
 		 */
-		auth = x509->akid_id;
+		auth = sig->auth_ids[0];
 		if (auth) {
 			pr_debug("- want %*phN\n", auth->len, auth->data);
 			for (p = pkcs7->certs; p; p = p->next) {
@@ -234,7 +228,7 @@
 					goto found_issuer_check_skid;
 			}
 		} else {
-			auth = x509->akid_skid;
+			auth = sig->auth_ids[1];
 			pr_debug("- want %*phN\n", auth->len, auth->data);
 			for (p = pkcs7->certs; p; p = p->next) {
 				if (!p->skid)
@@ -254,8 +248,8 @@
 		/* We matched issuer + serialNumber, but if there's an
 		 * authKeyId.keyId, that must match the CA subjKeyId also.
 		 */
-		if (x509->akid_skid &&
-		    !asymmetric_key_id_same(p->skid, x509->akid_skid)) {
+		if (sig->auth_ids[1] &&
+		    !asymmetric_key_id_same(p->skid, sig->auth_ids[1])) {
 			pr_warn("Sig %u: X.509 chain contains auth-skid nonmatch (%u->%u)\n",
 				sinfo->index, x509->index, p->index);
 			return -EKEYREJECTED;
@@ -267,7 +261,7 @@
 				sinfo->index);
 			return 0;
 		}
-		ret = x509_check_signature(p->pub, x509);
+		ret = public_key_verify_signature(p->pub, p->sig);
 		if (ret < 0)
 			return ret;
 		x509->signer = p;
@@ -279,16 +273,14 @@
 		might_sleep();
 	}
 
-maybe_missing_crypto_in_x509:
+unsupported_crypto_in_x509:
 	/* Just prune the certificate chain at this point if we lack some
 	 * crypto module to go further.  Note, however, we don't want to set
-	 * sinfo->missing_crypto as the signed info block may still be
+	 * sinfo->unsupported_crypto as the signed info block may still be
 	 * validatable against an X.509 cert lower in the chain that we have a
 	 * trusted copy of.
 	 */
-	if (ret == -ENOPKG)
-		return 0;
-	return ret;
+	return 0;
 }
 
 /*
@@ -332,7 +324,7 @@
 	}
 
 	/* Verify the PKCS#7 binary against the key */
-	ret = public_key_verify_signature(sinfo->signer->pub, &sinfo->sig);
+	ret = public_key_verify_signature(sinfo->signer->pub, sinfo->sig);
 	if (ret < 0)
 		return ret;
 
@@ -375,9 +367,8 @@
 		 enum key_being_used_for usage)
 {
 	struct pkcs7_signed_info *sinfo;
-	struct x509_certificate *x509;
 	int enopkg = -ENOPKG;
-	int ret, n;
+	int ret;
 
 	kenter("");
 
@@ -419,12 +410,6 @@
 		return -EINVAL;
 	}
 
-	for (n = 0, x509 = pkcs7->certs; x509; x509 = x509->next, n++) {
-		ret = x509_get_sig_params(x509);
-		if (ret < 0)
-			return ret;
-	}
-
 	for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) {
 		ret = pkcs7_verify_one(pkcs7, sinfo);
 		if (ret < 0) {
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 0f8b264..fd76b5f 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -39,15 +39,23 @@
 /*
  * Destroy a public key algorithm key.
  */
-void public_key_destroy(void *payload)
+void public_key_free(struct public_key *key)
 {
-	struct public_key *key = payload;
-
-	if (key)
+	if (key) {
 		kfree(key->key);
-	kfree(key);
+		kfree(key);
+	}
 }
-EXPORT_SYMBOL_GPL(public_key_destroy);
+EXPORT_SYMBOL_GPL(public_key_free);
+
+/*
+ * Destroy a public key algorithm key.
+ */
+static void public_key_destroy(void *payload0, void *payload3)
+{
+	public_key_free(payload0);
+	public_key_signature_free(payload3);
+}
 
 struct public_key_completion {
 	struct completion completion;
diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c
new file mode 100644
index 0000000..ac4bddf
--- /dev/null
+++ b/crypto/asymmetric_keys/restrict.c
@@ -0,0 +1,108 @@
+/* Instantiate a public key crypto key from an X.509 Certificate
+ *
+ * Copyright (C) 2012, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "ASYM: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <crypto/public_key.h>
+#include "asymmetric_keys.h"
+
+static bool use_builtin_keys;
+static struct asymmetric_key_id *ca_keyid;
+
+#ifndef MODULE
+static struct {
+	struct asymmetric_key_id id;
+	unsigned char data[10];
+} cakey;
+
+static int __init ca_keys_setup(char *str)
+{
+	if (!str)		/* default system keyring */
+		return 1;
+
+	if (strncmp(str, "id:", 3) == 0) {
+		struct asymmetric_key_id *p = &cakey.id;
+		size_t hexlen = (strlen(str) - 3) / 2;
+		int ret;
+
+		if (hexlen == 0 || hexlen > sizeof(cakey.data)) {
+			pr_err("Missing or invalid ca_keys id\n");
+			return 1;
+		}
+
+		ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen);
+		if (ret < 0)
+			pr_err("Unparsable ca_keys id hex string\n");
+		else
+			ca_keyid = p;	/* owner key 'id:xxxxxx' */
+	} else if (strcmp(str, "builtin") == 0) {
+		use_builtin_keys = true;
+	}
+
+	return 1;
+}
+__setup("ca_keys=", ca_keys_setup);
+#endif
+
+/**
+ * restrict_link_by_signature - Restrict additions to a ring of public keys
+ * @trust_keyring: A ring of keys that can be used to vouch for the new cert.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ *
+ * Check the new certificate against the ones in the trust keyring.  If one of
+ * those is the signing key and validates the new certificate, then mark the
+ * new certificate as being trusted.
+ *
+ * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a
+ * matching parent certificate in the trusted list, -EKEYREJECTED if the
+ * signature check fails or the key is blacklisted and some other error if
+ * there is a matching certificate but the signature check cannot be performed.
+ */
+int restrict_link_by_signature(struct key *trust_keyring,
+			       const struct key_type *type,
+			       const union key_payload *payload)
+{
+	const struct public_key_signature *sig;
+	struct key *key;
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	if (!trust_keyring)
+		return -ENOKEY;
+
+	if (type != &key_type_asymmetric)
+		return -EOPNOTSUPP;
+
+	sig = payload->data[asym_auth];
+	if (!sig->auth_ids[0] && !sig->auth_ids[1])
+		return 0;
+
+	if (ca_keyid && !asymmetric_key_id_partial(sig->auth_ids[1], ca_keyid))
+		return -EPERM;
+
+	/* See if we have a key that signed this one. */
+	key = find_asymmetric_key(trust_keyring,
+				  sig->auth_ids[0], sig->auth_ids[1],
+				  false);
+	if (IS_ERR(key))
+		return -ENOKEY;
+
+	if (use_builtin_keys && !test_bit(KEY_FLAG_BUILTIN, &key->flags))
+		ret = -ENOKEY;
+	else
+		ret = verify_signature(key, sig);
+	key_put(key);
+	return ret;
+}
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 004d5fc..11b7ba17 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -15,9 +15,27 @@
 #include <keys/asymmetric-subtype.h>
 #include <linux/export.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <crypto/public_key.h>
 #include "asymmetric_keys.h"
 
+/*
+ * Destroy a public key signature.
+ */
+void public_key_signature_free(struct public_key_signature *sig)
+{
+	int i;
+
+	if (sig) {
+		for (i = 0; i < ARRAY_SIZE(sig->auth_ids); i++)
+			kfree(sig->auth_ids[i]);
+		kfree(sig->s);
+		kfree(sig->digest);
+		kfree(sig);
+	}
+}
+EXPORT_SYMBOL_GPL(public_key_signature_free);
+
 /**
  * verify_signature - Initiate the use of an asymmetric key to verify a signature
  * @key: The asymmetric key to verify against
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 7e8c233..672a94c 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -16,7 +16,7 @@
 #include <linux/err.h>
 #include <linux/pe.h>
 #include <linux/asn1.h>
-#include <crypto/pkcs7.h>
+#include <linux/verification.h>
 #include <crypto/hash.h>
 #include "verify_pefile.h"
 
@@ -392,9 +392,8 @@
  * verify_pefile_signature - Verify the signature on a PE binary image
  * @pebuf: Buffer containing the PE binary image
  * @pelen: Length of the binary image
- * @trust_keyring: Signing certificates to use as starting points
+ * @trust_keys: Signing certificate(s) to use as starting points
  * @usage: The use to which the key is being put.
- * @_trusted: Set to true if trustworth, false otherwise
  *
  * Validate that the certificate chain inside the PKCS#7 message inside the PE
  * binary image intersects keys we already know and trust.
@@ -418,14 +417,10 @@
  * May also return -ENOMEM.
  */
 int verify_pefile_signature(const void *pebuf, unsigned pelen,
-			    struct key *trusted_keyring,
-			    enum key_being_used_for usage,
-			    bool *_trusted)
+			    struct key *trusted_keys,
+			    enum key_being_used_for usage)
 {
-	struct pkcs7_message *pkcs7;
 	struct pefile_context ctx;
-	const void *data;
-	size_t datalen;
 	int ret;
 
 	kenter("");
@@ -439,19 +434,10 @@
 	if (ret < 0)
 		return ret;
 
-	pkcs7 = pkcs7_parse_message(pebuf + ctx.sig_offset, ctx.sig_len);
-	if (IS_ERR(pkcs7))
-		return PTR_ERR(pkcs7);
-	ctx.pkcs7 = pkcs7;
-
-	ret = pkcs7_get_content_data(ctx.pkcs7, &data, &datalen, false);
-	if (ret < 0 || datalen == 0) {
-		pr_devel("PKCS#7 message does not contain data\n");
-		ret = -EBADMSG;
-		goto error;
-	}
-
-	ret = mscode_parse(&ctx);
+	ret = verify_pkcs7_signature(NULL, 0,
+				     pebuf + ctx.sig_offset, ctx.sig_len,
+				     trusted_keys, usage,
+				     mscode_parse, &ctx);
 	if (ret < 0)
 		goto error;
 
@@ -462,16 +448,8 @@
 	 * contents.
 	 */
 	ret = pefile_digest_pe(pebuf, pelen, &ctx);
-	if (ret < 0)
-		goto error;
-
-	ret = pkcs7_verify(pkcs7, usage);
-	if (ret < 0)
-		goto error;
-
-	ret = pkcs7_validate_trust(pkcs7, trusted_keyring, _trusted);
 
 error:
-	pkcs7_free_message(ctx.pkcs7);
+	kfree(ctx.digest);
 	return ret;
 }
diff --git a/crypto/asymmetric_keys/verify_pefile.h b/crypto/asymmetric_keys/verify_pefile.h
index a133eb8..cd4d209 100644
--- a/crypto/asymmetric_keys/verify_pefile.h
+++ b/crypto/asymmetric_keys/verify_pefile.h
@@ -9,7 +9,6 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-#include <linux/verify_pefile.h>
 #include <crypto/pkcs7.h>
 #include <crypto/hash_info.h>
 
@@ -23,7 +22,6 @@
 	unsigned	sig_offset;
 	unsigned	sig_len;
 	const struct section_header *secs;
-	struct pkcs7_message *pkcs7;
 
 	/* PKCS#7 MS Individual Code Signing content */
 	const void	*digest;		/* Digest */
@@ -39,4 +37,5 @@
 /*
  * mscode_parser.c
  */
-extern int mscode_parse(struct pefile_context *ctx);
+extern int mscode_parse(void *_ctx, const void *content_data, size_t data_len,
+			size_t asn1hdrlen);
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 4a29bac..865f46e 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -47,15 +47,12 @@
 void x509_free_certificate(struct x509_certificate *cert)
 {
 	if (cert) {
-		public_key_destroy(cert->pub);
+		public_key_free(cert->pub);
+		public_key_signature_free(cert->sig);
 		kfree(cert->issuer);
 		kfree(cert->subject);
 		kfree(cert->id);
 		kfree(cert->skid);
-		kfree(cert->akid_id);
-		kfree(cert->akid_skid);
-		kfree(cert->sig.digest);
-		kfree(cert->sig.s);
 		kfree(cert);
 	}
 }
@@ -78,6 +75,9 @@
 	cert->pub = kzalloc(sizeof(struct public_key), GFP_KERNEL);
 	if (!cert->pub)
 		goto error_no_ctx;
+	cert->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL);
+	if (!cert->sig)
+		goto error_no_ctx;
 	ctx = kzalloc(sizeof(struct x509_parse_context), GFP_KERNEL);
 	if (!ctx)
 		goto error_no_ctx;
@@ -108,6 +108,11 @@
 
 	cert->pub->keylen = ctx->key_size;
 
+	/* Grab the signature bits */
+	ret = x509_get_sig_params(cert);
+	if (ret < 0)
+		goto error_decode;
+
 	/* Generate cert issuer + serial number key ID */
 	kid = asymmetric_key_generate_id(cert->raw_serial,
 					 cert->raw_serial_size,
@@ -119,6 +124,11 @@
 	}
 	cert->id = kid;
 
+	/* Detect self-signed certificates */
+	ret = x509_check_for_self_signed(cert);
+	if (ret < 0)
+		goto error_decode;
+
 	kfree(ctx);
 	return cert;
 
@@ -188,33 +198,33 @@
 		return -ENOPKG; /* Unsupported combination */
 
 	case OID_md4WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "md4";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "md4";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 
 	case OID_sha1WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "sha1";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "sha1";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 
 	case OID_sha256WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "sha256";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "sha256";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 
 	case OID_sha384WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "sha384";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "sha384";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 
 	case OID_sha512WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "sha512";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "sha512";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 
 	case OID_sha224WithRSAEncryption:
-		ctx->cert->sig.hash_algo = "sha224";
-		ctx->cert->sig.pkey_algo = "rsa";
+		ctx->cert->sig->hash_algo = "sha224";
+		ctx->cert->sig->pkey_algo = "rsa";
 		break;
 	}
 
@@ -572,14 +582,14 @@
 
 	pr_debug("AKID: keyid: %*phN\n", (int)vlen, value);
 
-	if (ctx->cert->akid_skid)
+	if (ctx->cert->sig->auth_ids[1])
 		return 0;
 
 	kid = asymmetric_key_generate_id(value, vlen, "", 0);
 	if (IS_ERR(kid))
 		return PTR_ERR(kid);
 	pr_debug("authkeyid %*phN\n", kid->len, kid->data);
-	ctx->cert->akid_skid = kid;
+	ctx->cert->sig->auth_ids[1] = kid;
 	return 0;
 }
 
@@ -611,7 +621,7 @@
 
 	pr_debug("AKID: serial: %*phN\n", (int)vlen, value);
 
-	if (!ctx->akid_raw_issuer || ctx->cert->akid_id)
+	if (!ctx->akid_raw_issuer || ctx->cert->sig->auth_ids[0])
 		return 0;
 
 	kid = asymmetric_key_generate_id(value,
@@ -622,6 +632,6 @@
 		return PTR_ERR(kid);
 
 	pr_debug("authkeyid %*phN\n", kid->len, kid->data);
-	ctx->cert->akid_id = kid;
+	ctx->cert->sig->auth_ids[0] = kid;
 	return 0;
 }
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index dbeed60..05eef1c 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -17,13 +17,11 @@
 	struct x509_certificate *next;
 	struct x509_certificate *signer;	/* Certificate that signed this one */
 	struct public_key *pub;			/* Public key details */
-	struct public_key_signature sig;	/* Signature parameters */
+	struct public_key_signature *sig;	/* Signature parameters */
 	char		*issuer;		/* Name of certificate issuer */
 	char		*subject;		/* Name of certificate subject */
 	struct asymmetric_key_id *id;		/* Issuer + Serial number */
 	struct asymmetric_key_id *skid;		/* Subject + subjectKeyId (optional) */
-	struct asymmetric_key_id *akid_id;	/* CA AuthKeyId matching ->id (optional) */
-	struct asymmetric_key_id *akid_skid;	/* CA AuthKeyId matching ->skid (optional) */
 	time64_t	valid_from;
 	time64_t	valid_to;
 	const void	*tbs;			/* Signed data */
@@ -41,8 +39,9 @@
 	unsigned	index;
 	bool		seen;			/* Infinite recursion prevention */
 	bool		verified;
-	bool		trusted;
-	bool		unsupported_crypto;	/* T if can't be verified due to missing crypto */
+	bool		self_signed;		/* T if self-signed (check unsupported_sig too) */
+	bool		unsupported_key;	/* T if key uses unsupported crypto */
+	bool		unsupported_sig;	/* T if signature uses unsupported crypto */
 };
 
 /*
@@ -58,5 +57,4 @@
  * x509_public_key.c
  */
 extern int x509_get_sig_params(struct x509_certificate *cert);
-extern int x509_check_signature(const struct public_key *pub,
-				struct x509_certificate *cert);
+extern int x509_check_for_self_signed(struct x509_certificate *cert);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 733c046..fb73229 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -20,256 +20,133 @@
 #include "asymmetric_keys.h"
 #include "x509_parser.h"
 
-static bool use_builtin_keys;
-static struct asymmetric_key_id *ca_keyid;
-
-#ifndef MODULE
-static struct {
-	struct asymmetric_key_id id;
-	unsigned char data[10];
-} cakey;
-
-static int __init ca_keys_setup(char *str)
-{
-	if (!str)		/* default system keyring */
-		return 1;
-
-	if (strncmp(str, "id:", 3) == 0) {
-		struct asymmetric_key_id *p = &cakey.id;
-		size_t hexlen = (strlen(str) - 3) / 2;
-		int ret;
-
-		if (hexlen == 0 || hexlen > sizeof(cakey.data)) {
-			pr_err("Missing or invalid ca_keys id\n");
-			return 1;
-		}
-
-		ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen);
-		if (ret < 0)
-			pr_err("Unparsable ca_keys id hex string\n");
-		else
-			ca_keyid = p;	/* owner key 'id:xxxxxx' */
-	} else if (strcmp(str, "builtin") == 0) {
-		use_builtin_keys = true;
-	}
-
-	return 1;
-}
-__setup("ca_keys=", ca_keys_setup);
-#endif
-
-/**
- * x509_request_asymmetric_key - Request a key by X.509 certificate params.
- * @keyring: The keys to search.
- * @id: The issuer & serialNumber to look for or NULL.
- * @skid: The subjectKeyIdentifier to look for or NULL.
- * @partial: Use partial match if true, exact if false.
- *
- * Find a key in the given keyring by identifier.  The preferred identifier is
- * the issuer + serialNumber and the fallback identifier is the
- * subjectKeyIdentifier.  If both are given, the lookup is by the former, but
- * the latter must also match.
- */
-struct key *x509_request_asymmetric_key(struct key *keyring,
-					const struct asymmetric_key_id *id,
-					const struct asymmetric_key_id *skid,
-					bool partial)
-{
-	struct key *key;
-	key_ref_t ref;
-	const char *lookup;
-	char *req, *p;
-	int len;
-
-	if (id) {
-		lookup = id->data;
-		len = id->len;
-	} else {
-		lookup = skid->data;
-		len = skid->len;
-	}
-	
-	/* Construct an identifier "id:<keyid>". */
-	p = req = kmalloc(2 + 1 + len * 2 + 1, GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	if (partial) {
-		*p++ = 'i';
-		*p++ = 'd';
-	} else {
-		*p++ = 'e';
-		*p++ = 'x';
-	}
-	*p++ = ':';
-	p = bin2hex(p, lookup, len);
-	*p = 0;
-
-	pr_debug("Look up: \"%s\"\n", req);
-
-	ref = keyring_search(make_key_ref(keyring, 1),
-			     &key_type_asymmetric, req);
-	if (IS_ERR(ref))
-		pr_debug("Request for key '%s' err %ld\n", req, PTR_ERR(ref));
-	kfree(req);
-
-	if (IS_ERR(ref)) {
-		switch (PTR_ERR(ref)) {
-			/* Hide some search errors */
-		case -EACCES:
-		case -ENOTDIR:
-		case -EAGAIN:
-			return ERR_PTR(-ENOKEY);
-		default:
-			return ERR_CAST(ref);
-		}
-	}
-
-	key = key_ref_to_ptr(ref);
-	if (id && skid) {
-		const struct asymmetric_key_ids *kids = asymmetric_key_ids(key);
-		if (!kids->id[1]) {
-			pr_debug("issuer+serial match, but expected SKID missing\n");
-			goto reject;
-		}
-		if (!asymmetric_key_id_same(skid, kids->id[1])) {
-			pr_debug("issuer+serial match, but SKID does not\n");
-			goto reject;
-		}
-	}
-	
-	pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key));
-	return key;
-
-reject:
-	key_put(key);
-	return ERR_PTR(-EKEYREJECTED);
-}
-EXPORT_SYMBOL_GPL(x509_request_asymmetric_key);
-
 /*
  * Set up the signature parameters in an X.509 certificate.  This involves
  * digesting the signed data and extracting the signature.
  */
 int x509_get_sig_params(struct x509_certificate *cert)
 {
+	struct public_key_signature *sig = cert->sig;
 	struct crypto_shash *tfm;
 	struct shash_desc *desc;
-	size_t digest_size, desc_size;
-	void *digest;
+	size_t desc_size;
 	int ret;
 
 	pr_devel("==>%s()\n", __func__);
 
-	if (cert->unsupported_crypto)
-		return -ENOPKG;
-	if (cert->sig.s)
-		return 0;
+	if (!cert->pub->pkey_algo)
+		cert->unsupported_key = true;
 
-	cert->sig.s = kmemdup(cert->raw_sig, cert->raw_sig_size,
-			      GFP_KERNEL);
-	if (!cert->sig.s)
+	if (!sig->pkey_algo)
+		cert->unsupported_sig = true;
+
+	/* We check the hash if we can - even if we can't then verify it */
+	if (!sig->hash_algo) {
+		cert->unsupported_sig = true;
+		return 0;
+	}
+
+	sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL);
+	if (!sig->s)
 		return -ENOMEM;
 
-	cert->sig.s_size = cert->raw_sig_size;
+	sig->s_size = cert->raw_sig_size;
 
 	/* Allocate the hashing algorithm we're going to need and find out how
 	 * big the hash operational data will be.
 	 */
-	tfm = crypto_alloc_shash(cert->sig.hash_algo, 0, 0);
+	tfm = crypto_alloc_shash(sig->hash_algo, 0, 0);
 	if (IS_ERR(tfm)) {
 		if (PTR_ERR(tfm) == -ENOENT) {
-			cert->unsupported_crypto = true;
-			return -ENOPKG;
+			cert->unsupported_sig = true;
+			return 0;
 		}
 		return PTR_ERR(tfm);
 	}
 
 	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
-	digest_size = crypto_shash_digestsize(tfm);
+	sig->digest_size = crypto_shash_digestsize(tfm);
 
-	/* We allocate the hash operational data storage on the end of the
-	 * digest storage space.
-	 */
 	ret = -ENOMEM;
-	digest = kzalloc(ALIGN(digest_size, __alignof__(*desc)) + desc_size,
-			 GFP_KERNEL);
-	if (!digest)
+	sig->digest = kmalloc(sig->digest_size, GFP_KERNEL);
+	if (!sig->digest)
 		goto error;
 
-	cert->sig.digest = digest;
-	cert->sig.digest_size = digest_size;
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc)
+		goto error;
 
-	desc = PTR_ALIGN(digest + digest_size, __alignof__(*desc));
 	desc->tfm = tfm;
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	ret = crypto_shash_init(desc);
 	if (ret < 0)
-		goto error;
+		goto error_2;
 	might_sleep();
-	ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest);
+	ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
+
+error_2:
+	kfree(desc);
 error:
 	crypto_free_shash(tfm);
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(x509_get_sig_params);
 
 /*
- * Check the signature on a certificate using the provided public key
+ * Check for self-signedness in an X.509 cert and if found, check the signature
+ * immediately if we can.
  */
-int x509_check_signature(const struct public_key *pub,
-			 struct x509_certificate *cert)
+int x509_check_for_self_signed(struct x509_certificate *cert)
 {
-	int ret;
+	int ret = 0;
 
 	pr_devel("==>%s()\n", __func__);
 
-	ret = x509_get_sig_params(cert);
-	if (ret < 0)
-		return ret;
+	if (cert->raw_subject_size != cert->raw_issuer_size ||
+	    memcmp(cert->raw_subject, cert->raw_issuer,
+		   cert->raw_issuer_size) != 0)
+		goto not_self_signed;
 
-	ret = public_key_verify_signature(pub, &cert->sig);
-	if (ret == -ENOPKG)
-		cert->unsupported_crypto = true;
-	pr_debug("Cert Verification: %d\n", ret);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(x509_check_signature);
+	if (cert->sig->auth_ids[0] || cert->sig->auth_ids[1]) {
+		/* If the AKID is present it may have one or two parts.  If
+		 * both are supplied, both must match.
+		 */
+		bool a = asymmetric_key_id_same(cert->skid, cert->sig->auth_ids[1]);
+		bool b = asymmetric_key_id_same(cert->id, cert->sig->auth_ids[0]);
 
-/*
- * Check the new certificate against the ones in the trust keyring.  If one of
- * those is the signing key and validates the new certificate, then mark the
- * new certificate as being trusted.
- *
- * Return 0 if the new certificate was successfully validated, 1 if we couldn't
- * find a matching parent certificate in the trusted list and an error if there
- * is a matching certificate but the signature check fails.
- */
-static int x509_validate_trust(struct x509_certificate *cert,
-			       struct key *trust_keyring)
-{
-	struct key *key;
-	int ret = 1;
+		if (!a && !b)
+			goto not_self_signed;
 
-	if (!trust_keyring)
-		return -EOPNOTSUPP;
-
-	if (ca_keyid && !asymmetric_key_id_partial(cert->akid_skid, ca_keyid))
-		return -EPERM;
-
-	key = x509_request_asymmetric_key(trust_keyring,
-					  cert->akid_id, cert->akid_skid,
-					  false);
-	if (!IS_ERR(key))  {
-		if (!use_builtin_keys
-		    || test_bit(KEY_FLAG_BUILTIN, &key->flags))
-			ret = x509_check_signature(key->payload.data[asym_crypto],
-						   cert);
-		key_put(key);
+		ret = -EKEYREJECTED;
+		if (((a && !b) || (b && !a)) &&
+		    cert->sig->auth_ids[0] && cert->sig->auth_ids[1])
+			goto out;
 	}
+
+	ret = -EKEYREJECTED;
+	if (cert->pub->pkey_algo != cert->sig->pkey_algo)
+		goto out;
+
+	ret = public_key_verify_signature(cert->pub, cert->sig);
+	if (ret < 0) {
+		if (ret == -ENOPKG) {
+			cert->unsupported_sig = true;
+			ret = 0;
+		}
+		goto out;
+	}
+
+	pr_devel("Cert Self-signature verified");
+	cert->self_signed = true;
+
+out:
+	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
+
+not_self_signed:
+	pr_devel("<==%s() = 0 [not]\n", __func__);
+	return 0;
 }
 
 /*
@@ -291,34 +168,22 @@
 	pr_devel("Cert Issuer: %s\n", cert->issuer);
 	pr_devel("Cert Subject: %s\n", cert->subject);
 
-	if (!cert->pub->pkey_algo ||
-	    !cert->sig.pkey_algo ||
-	    !cert->sig.hash_algo) {
+	if (cert->unsupported_key) {
 		ret = -ENOPKG;
 		goto error_free_cert;
 	}
 
 	pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo);
 	pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to);
-	pr_devel("Cert Signature: %s + %s\n",
-		 cert->sig.pkey_algo,
-		 cert->sig.hash_algo);
 
 	cert->pub->id_type = "X509";
 
-	/* Check the signature on the key if it appears to be self-signed */
-	if ((!cert->akid_skid && !cert->akid_id) ||
-	    asymmetric_key_id_same(cert->skid, cert->akid_skid) ||
-	    asymmetric_key_id_same(cert->id, cert->akid_id)) {
-		ret = x509_check_signature(cert->pub, cert); /* self-signed */
-		if (ret < 0)
-			goto error_free_cert;
-	} else if (!prep->trusted) {
-		ret = x509_validate_trust(cert, get_system_trusted_keyring());
-		if (ret)
-			ret = x509_validate_trust(cert, get_ima_mok_keyring());
-		if (!ret)
-			prep->trusted = 1;
+	if (cert->unsupported_sig) {
+		public_key_signature_free(cert->sig);
+		cert->sig = NULL;
+	} else {
+		pr_devel("Cert Signature: %s + %s\n",
+			 cert->sig->pkey_algo, cert->sig->hash_algo);
 	}
 
 	/* Propose a description */
@@ -353,6 +218,7 @@
 	prep->payload.data[asym_subtype] = &public_key_subtype;
 	prep->payload.data[asym_key_ids] = kids;
 	prep->payload.data[asym_crypto] = cert->pub;
+	prep->payload.data[asym_auth] = cert->sig;
 	prep->description = desc;
 	prep->quotalen = 100;
 
@@ -360,6 +226,7 @@
 	cert->pub = NULL;
 	cert->id = NULL;
 	cert->skid = NULL;
+	cert->sig = NULL;
 	desc = NULL;
 	ret = 0;
 
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 9020349..2cb6f7e 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -201,8 +201,8 @@
 static struct dw_dma_slave sata_dwc_dma_dws = {
 	.src_id = 0,
 	.dst_id = 0,
-	.src_master = 0,
-	.dst_master = 1,
+	.m_master = 1,
+	.p_master = 0,
 };
 
 /*
@@ -1248,7 +1248,7 @@
 	hsdev->dma->dev = &ofdev->dev;
 
 	/* Initialize AHB DMAC */
-	err = dw_dma_probe(hsdev->dma, NULL);
+	err = dw_dma_probe(hsdev->dma);
 	if (err)
 		goto error_dma_iomap;
 
diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index f364fa4..72fe0a5 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -30,6 +30,10 @@
 #include <asm/signal.h>
 #endif
 
+#ifdef CONFIG_MIPS
+#include <asm/traps.h>
+#endif
+
 #define  ARB_ERR_CAP_CLEAR		(1 << 0)
 #define  ARB_ERR_CAP_STATUS_TIMEOUT	(1 << 12)
 #define  ARB_ERR_CAP_STATUS_TEA		(1 << 11)
@@ -238,6 +242,29 @@
 }
 #endif
 
+#ifdef CONFIG_MIPS
+static int brcmstb_bus_error_handler(struct pt_regs *regs, int is_fixup)
+{
+	int ret = 0;
+	struct brcmstb_gisb_arb_device *gdev;
+	u32 cap_status;
+
+	list_for_each_entry(gdev, &brcmstb_gisb_arb_device_list, next) {
+		cap_status = gisb_read(gdev, ARB_ERR_CAP_STATUS);
+
+		/* Invalid captured address, bail out */
+		if (!(cap_status & ARB_ERR_CAP_STATUS_VALID)) {
+			is_fixup = 1;
+			goto out;
+		}
+
+		ret |= brcmstb_gisb_arb_decode_addr(gdev, "bus error");
+	}
+out:
+	return is_fixup ? MIPS_BE_FIXUP : MIPS_BE_FATAL;
+}
+#endif
+
 static irqreturn_t brcmstb_gisb_timeout_handler(int irq, void *dev_id)
 {
 	brcmstb_gisb_arb_decode_addr(dev_id, "timeout");
@@ -355,6 +382,9 @@
 	hook_fault_code(22, brcmstb_bus_error_handler, SIGBUS, 0,
 			"imprecise external abort");
 #endif
+#ifdef CONFIG_MIPS
+	board_be_handler = brcmstb_bus_error_handler;
+#endif
 
 	dev_info(&pdev->dev, "registered mem: %p, irqs: %d, %d\n",
 			gdev->base, timeout_irq, tea_irq);
diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c
index 1c543ef..cad49bc 100644
--- a/drivers/bus/mips_cdmm.c
+++ b/drivers/bus/mips_cdmm.c
@@ -599,8 +599,8 @@
  * mips_cdmm_bus_down() - Tear down the CDMM bus.
  * @data:	Pointer to unsigned int CPU number.
  *
- * This work_on_cpu callback function is executed on a given CPU to call the
- * CDMM driver cpu_down callback for all devices on that CPU.
+ * This function is executed on the hotplugged CPU and calls the CDMM
+ * driver cpu_down callback for all devices on that CPU.
  */
 static long mips_cdmm_bus_down(void *data)
 {
@@ -630,7 +630,9 @@
  * CDMM devices on that CPU, or to call the CDMM driver cpu_up callback for all
  * devices already discovered on that CPU.
  *
- * It is used during initialisation and when CPUs are brought online.
+ * It is used as work_on_cpu callback function during
+ * initialisation. When CPUs are brought online the function is
+ * invoked directly on the hotplugged CPU.
  */
 static long mips_cdmm_bus_up(void *data)
 {
@@ -677,10 +679,10 @@
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
-		work_on_cpu(cpu, mips_cdmm_bus_up, &cpu);
+		mips_cdmm_bus_up(&cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-		work_on_cpu(cpu, mips_cdmm_bus_down, &cpu);
+		mips_cdmm_bus_down(&cpu);
 		break;
 	default:
 		return NOTIFY_DONE;
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index c455549..90518cd 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -197,6 +197,9 @@
 	---help---
 	  Support for the Marvell PXA SoC.
 
+config COMMON_CLK_PIC32
+	def_bool COMMON_CLK && MACH_PIC32
+
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
 source "drivers/clk/mvebu/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 46869d6..18e64bb 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -58,6 +58,7 @@
 obj-$(CONFIG_MACH_INGENIC)		+= ingenic/
 obj-$(CONFIG_COMMON_CLK_KEYSTONE)	+= keystone/
 obj-$(CONFIG_ARCH_MEDIATEK)		+= mediatek/
+obj-$(CONFIG_MACH_PIC32)		+= microchip/
 ifeq ($(CONFIG_COMMON_CLK), y)
 obj-$(CONFIG_ARCH_MMP)			+= mmp/
 endif
diff --git a/drivers/clk/microchip/Makefile b/drivers/clk/microchip/Makefile
new file mode 100644
index 0000000..2152f41
--- /dev/null
+++ b/drivers/clk/microchip/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_COMMON_CLK_PIC32) += clk-core.o
+obj-$(CONFIG_PIC32MZDA) += clk-pic32mzda.o
diff --git a/drivers/clk/microchip/clk-core.c b/drivers/clk/microchip/clk-core.c
new file mode 100644
index 0000000..ca85cea
--- /dev/null
+++ b/drivers/clk/microchip/clk-core.c
@@ -0,0 +1,1031 @@
+/*
+ * Purna Chandra Mandal,<purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <asm/mach-pic32/pic32.h>
+#include <asm/traps.h>
+
+#include "clk-core.h"
+
+/* OSCCON Reg fields */
+#define OSC_CUR_MASK		0x07
+#define OSC_CUR_SHIFT		12
+#define OSC_NEW_MASK		0x07
+#define OSC_NEW_SHIFT		8
+#define OSC_SWEN		BIT(0)
+
+/* SPLLCON Reg fields */
+#define PLL_RANGE_MASK		0x07
+#define PLL_RANGE_SHIFT		0
+#define PLL_ICLK_MASK		0x01
+#define PLL_ICLK_SHIFT		7
+#define PLL_IDIV_MASK		0x07
+#define PLL_IDIV_SHIFT		8
+#define PLL_ODIV_MASK		0x07
+#define PLL_ODIV_SHIFT		24
+#define PLL_MULT_MASK		0x7F
+#define PLL_MULT_SHIFT		16
+#define PLL_MULT_MAX		128
+#define PLL_ODIV_MIN		1
+#define PLL_ODIV_MAX		5
+
+/* Peripheral Bus Clock Reg Fields */
+#define PB_DIV_MASK		0x7f
+#define PB_DIV_SHIFT		0
+#define PB_DIV_READY		BIT(11)
+#define PB_DIV_ENABLE		BIT(15)
+#define PB_DIV_MAX		128
+#define PB_DIV_MIN		0
+
+/* Reference Oscillator Control Reg fields */
+#define REFO_SEL_MASK		0x0f
+#define REFO_SEL_SHIFT		0
+#define REFO_ACTIVE		BIT(8)
+#define REFO_DIVSW_EN		BIT(9)
+#define REFO_OE			BIT(12)
+#define REFO_ON			BIT(15)
+#define REFO_DIV_SHIFT		16
+#define REFO_DIV_MASK		0x7fff
+
+/* Reference Oscillator Trim Register Fields */
+#define REFO_TRIM_REG		0x10
+#define REFO_TRIM_MASK		0x1ff
+#define REFO_TRIM_SHIFT		23
+#define REFO_TRIM_MAX		511
+
+/* Mux Slew Control Register fields */
+#define SLEW_BUSY		BIT(0)
+#define SLEW_DOWNEN		BIT(1)
+#define SLEW_UPEN		BIT(2)
+#define SLEW_DIV		0x07
+#define SLEW_DIV_SHIFT		8
+#define SLEW_SYSDIV		0x0f
+#define SLEW_SYSDIV_SHIFT	20
+
+/* Clock Poll Timeout */
+#define LOCK_TIMEOUT_US         USEC_PER_MSEC
+
+/* SoC specific clock needed during SPLL clock rate switch */
+static struct clk_hw *pic32_sclk_hw;
+
+/* add instruction pipeline delay while CPU clock is in-transition. */
+#define cpu_nop5()			\
+do {					\
+	__asm__ __volatile__("nop");	\
+	__asm__ __volatile__("nop");	\
+	__asm__ __volatile__("nop");	\
+	__asm__ __volatile__("nop");	\
+	__asm__ __volatile__("nop");	\
+} while (0)
+
+/* Perpheral bus clocks */
+struct pic32_periph_clk {
+	struct clk_hw hw;
+	void __iomem *ctrl_reg;
+	struct pic32_clk_common *core;
+};
+
+#define clkhw_to_pbclk(_hw)	container_of(_hw, struct pic32_periph_clk, hw)
+
+static int pbclk_is_enabled(struct clk_hw *hw)
+{
+	struct pic32_periph_clk *pb = clkhw_to_pbclk(hw);
+
+	return readl(pb->ctrl_reg) & PB_DIV_ENABLE;
+}
+
+static int pbclk_enable(struct clk_hw *hw)
+{
+	struct pic32_periph_clk *pb = clkhw_to_pbclk(hw);
+
+	writel(PB_DIV_ENABLE, PIC32_SET(pb->ctrl_reg));
+	return 0;
+}
+
+static void pbclk_disable(struct clk_hw *hw)
+{
+	struct pic32_periph_clk *pb = clkhw_to_pbclk(hw);
+
+	writel(PB_DIV_ENABLE, PIC32_CLR(pb->ctrl_reg));
+}
+
+static unsigned long calc_best_divided_rate(unsigned long rate,
+					    unsigned long parent_rate,
+					    u32 divider_max,
+					    u32 divider_min)
+{
+	unsigned long divided_rate, divided_rate_down, best_rate;
+	unsigned long div, div_up;
+
+	/* eq. clk_rate = parent_rate / divider.
+	 *
+	 * Find best divider to produce closest of target divided rate.
+	 */
+	div = parent_rate / rate;
+	div = clamp_val(div, divider_min, divider_max);
+	div_up = clamp_val(div + 1, divider_min, divider_max);
+
+	divided_rate = parent_rate / div;
+	divided_rate_down = parent_rate / div_up;
+	if (abs(rate - divided_rate_down) < abs(rate - divided_rate))
+		best_rate = divided_rate_down;
+	else
+		best_rate = divided_rate;
+
+	return best_rate;
+}
+
+static inline u32 pbclk_read_pbdiv(struct pic32_periph_clk *pb)
+{
+	return ((readl(pb->ctrl_reg) >> PB_DIV_SHIFT) & PB_DIV_MASK) + 1;
+}
+
+static unsigned long pbclk_recalc_rate(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct pic32_periph_clk *pb = clkhw_to_pbclk(hw);
+
+	return parent_rate / pbclk_read_pbdiv(pb);
+}
+
+static long pbclk_round_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long *parent_rate)
+{
+	return calc_best_divided_rate(rate, *parent_rate,
+				      PB_DIV_MAX, PB_DIV_MIN);
+}
+
+static int pbclk_set_rate(struct clk_hw *hw, unsigned long rate,
+			  unsigned long parent_rate)
+{
+	struct pic32_periph_clk *pb = clkhw_to_pbclk(hw);
+	unsigned long flags;
+	u32 v, div;
+	int err;
+
+	/* check & wait for DIV_READY */
+	err = readl_poll_timeout(pb->ctrl_reg, v, v & PB_DIV_READY,
+				 1, LOCK_TIMEOUT_US);
+	if (err)
+		return err;
+
+	/* calculate clkdiv and best rate */
+	div = DIV_ROUND_CLOSEST(parent_rate, rate);
+
+	spin_lock_irqsave(&pb->core->reg_lock, flags);
+
+	/* apply new div */
+	v = readl(pb->ctrl_reg);
+	v &= ~PB_DIV_MASK;
+	v |= (div - 1);
+
+	pic32_syskey_unlock();
+
+	writel(v, pb->ctrl_reg);
+
+	spin_unlock_irqrestore(&pb->core->reg_lock, flags);
+
+	/* wait again, for pbdivready */
+	err = readl_poll_timeout_atomic(pb->ctrl_reg, v, v & PB_DIV_READY,
+					1, LOCK_TIMEOUT_US);
+	if (err)
+		return err;
+
+	/* confirm that new div is applied correctly */
+	return (pbclk_read_pbdiv(pb) == div) ? 0 : -EBUSY;
+}
+
+const struct clk_ops pic32_pbclk_ops = {
+	.enable		= pbclk_enable,
+	.disable	= pbclk_disable,
+	.is_enabled	= pbclk_is_enabled,
+	.recalc_rate	= pbclk_recalc_rate,
+	.round_rate	= pbclk_round_rate,
+	.set_rate	= pbclk_set_rate,
+};
+
+struct clk *pic32_periph_clk_register(const struct pic32_periph_clk_data *desc,
+				      struct pic32_clk_common *core)
+{
+	struct pic32_periph_clk *pbclk;
+	struct clk *clk;
+
+	pbclk = devm_kzalloc(core->dev, sizeof(*pbclk), GFP_KERNEL);
+	if (!pbclk)
+		return ERR_PTR(-ENOMEM);
+
+	pbclk->hw.init = &desc->init_data;
+	pbclk->core = core;
+	pbclk->ctrl_reg = desc->ctrl_reg + core->iobase;
+
+	clk = devm_clk_register(core->dev, &pbclk->hw);
+	if (IS_ERR(clk)) {
+		dev_err(core->dev, "%s: clk_register() failed\n", __func__);
+		devm_kfree(core->dev, pbclk);
+	}
+
+	return clk;
+}
+
+/* Reference oscillator operations */
+struct pic32_ref_osc {
+	struct clk_hw hw;
+	void __iomem *ctrl_reg;
+	const u32 *parent_map;
+	struct pic32_clk_common *core;
+};
+
+#define clkhw_to_refosc(_hw)	container_of(_hw, struct pic32_ref_osc, hw)
+
+static int roclk_is_enabled(struct clk_hw *hw)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+
+	return readl(refo->ctrl_reg) & REFO_ON;
+}
+
+static int roclk_enable(struct clk_hw *hw)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+
+	writel(REFO_ON | REFO_OE, PIC32_SET(refo->ctrl_reg));
+	return 0;
+}
+
+static void roclk_disable(struct clk_hw *hw)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+
+	writel(REFO_ON | REFO_OE, PIC32_CLR(refo->ctrl_reg));
+}
+
+static void roclk_init(struct clk_hw *hw)
+{
+	/* initialize clock in disabled state */
+	roclk_disable(hw);
+}
+
+static u8 roclk_get_parent(struct clk_hw *hw)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+	u32 v, i;
+
+	v = (readl(refo->ctrl_reg) >> REFO_SEL_SHIFT) & REFO_SEL_MASK;
+
+	if (!refo->parent_map)
+		return v;
+
+	for (i = 0; i < clk_hw_get_num_parents(hw); i++)
+		if (refo->parent_map[i] == v)
+			return i;
+
+	return -EINVAL;
+}
+
+static unsigned long roclk_calc_rate(unsigned long parent_rate,
+				     u32 rodiv, u32 rotrim)
+{
+	u64 rate64;
+
+	/* fout = fin / [2 * {div + (trim / 512)}]
+	 *	= fin * 512 / [1024 * div + 2 * trim]
+	 *	= fin * 256 / (512 * div + trim)
+	 *	= (fin << 8) / ((div << 9) + trim)
+	 */
+	if (rotrim) {
+		rodiv = (rodiv << 9) + rotrim;
+		rate64 = parent_rate;
+		rate64 <<= 8;
+		do_div(rate64, rodiv);
+	} else if (rodiv) {
+		rate64 = parent_rate / (rodiv << 1);
+	} else {
+		rate64 = parent_rate;
+	}
+	return rate64;
+}
+
+static void roclk_calc_div_trim(unsigned long rate,
+				unsigned long parent_rate,
+				u32 *rodiv_p, u32 *rotrim_p)
+{
+	u32 div, rotrim, rodiv;
+	u64 frac;
+
+	/* Find integer approximation of floating-point arithmetic.
+	 *      fout = fin / [2 * {rodiv + (rotrim / 512)}] ... (1)
+	 * i.e. fout = fin / 2 * DIV
+	 *      whereas DIV = rodiv + (rotrim / 512)
+	 *
+	 * Since kernel does not perform floating-point arithmatic so
+	 * (rotrim/512) will be zero. And DIV & rodiv will result same.
+	 *
+	 * ie. fout = (fin * 256) / [(512 * rodiv) + rotrim]  ... from (1)
+	 * ie. rotrim = ((fin * 256) / fout) - (512 * DIV)
+	 */
+	if (parent_rate <= rate) {
+		div = 0;
+		frac = 0;
+		rodiv = 0;
+		rotrim = 0;
+	} else {
+		div = parent_rate / (rate << 1);
+		frac = parent_rate;
+		frac <<= 8;
+		do_div(frac, rate);
+		frac -= (u64)(div << 9);
+
+		rodiv = (div > REFO_DIV_MASK) ? REFO_DIV_MASK : div;
+		rotrim = (frac >= REFO_TRIM_MAX) ? REFO_TRIM_MAX : frac;
+	}
+
+	if (rodiv_p)
+		*rodiv_p = rodiv;
+
+	if (rotrim_p)
+		*rotrim_p = rotrim;
+}
+
+static unsigned long roclk_recalc_rate(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+	u32 v, rodiv, rotrim;
+
+	/* get rodiv */
+	v = readl(refo->ctrl_reg);
+	rodiv = (v >> REFO_DIV_SHIFT) & REFO_DIV_MASK;
+
+	/* get trim */
+	v = readl(refo->ctrl_reg + REFO_TRIM_REG);
+	rotrim = (v >> REFO_TRIM_SHIFT) & REFO_TRIM_MASK;
+
+	return roclk_calc_rate(parent_rate, rodiv, rotrim);
+}
+
+static long roclk_round_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long *parent_rate)
+{
+	u32 rotrim, rodiv;
+
+	/* calculate dividers for new rate */
+	roclk_calc_div_trim(rate, *parent_rate, &rodiv, &rotrim);
+
+	/* caclulate new rate (rounding) based on new rodiv & rotrim */
+	return roclk_calc_rate(*parent_rate, rodiv, rotrim);
+}
+
+static int roclk_determine_rate(struct clk_hw *hw,
+				struct clk_rate_request *req)
+{
+	struct clk_hw *parent_clk, *best_parent_clk = NULL;
+	unsigned int i, delta, best_delta = -1;
+	unsigned long parent_rate, best_parent_rate = 0;
+	unsigned long best = 0, nearest_rate;
+
+	/* find a parent which can generate nearest clkrate >= rate */
+	for (i = 0; i < clk_hw_get_num_parents(hw); i++) {
+		/* get parent */
+		parent_clk = clk_hw_get_parent_by_index(hw, i);
+		if (!parent_clk)
+			continue;
+
+		/* skip if parent runs slower than target rate */
+		parent_rate = clk_hw_get_rate(parent_clk);
+		if (req->rate > parent_rate)
+			continue;
+
+		nearest_rate = roclk_round_rate(hw, req->rate, &parent_rate);
+		delta = abs(nearest_rate - req->rate);
+		if ((nearest_rate >= req->rate) && (delta < best_delta)) {
+			best_parent_clk = parent_clk;
+			best_parent_rate = parent_rate;
+			best = nearest_rate;
+			best_delta = delta;
+
+			if (delta == 0)
+				break;
+		}
+	}
+
+	/* if no match found, retain old rate */
+	if (!best_parent_clk) {
+		pr_err("%s:%s, no parent found for rate %lu.\n",
+		       __func__, clk_hw_get_name(hw), req->rate);
+		return clk_hw_get_rate(hw);
+	}
+
+	pr_debug("%s,rate %lu, best_parent(%s, %lu), best %lu, delta %d\n",
+		 clk_hw_get_name(hw), req->rate,
+		 clk_hw_get_name(best_parent_clk), best_parent_rate,
+		 best, best_delta);
+
+	if (req->best_parent_rate)
+		req->best_parent_rate = best_parent_rate;
+
+	if (req->best_parent_hw)
+		req->best_parent_hw = best_parent_clk;
+
+	return best;
+}
+
+static int roclk_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+	unsigned long flags;
+	u32 v;
+	int err;
+
+	if (refo->parent_map)
+		index = refo->parent_map[index];
+
+	/* wait until ACTIVE bit is zero or timeout */
+	err = readl_poll_timeout(refo->ctrl_reg, v, !(v & REFO_ACTIVE),
+				 1, LOCK_TIMEOUT_US);
+	if (err) {
+		pr_err("%s: poll failed, clk active\n", clk_hw_get_name(hw));
+		return err;
+	}
+
+	spin_lock_irqsave(&refo->core->reg_lock, flags);
+
+	pic32_syskey_unlock();
+
+	/* calculate & apply new */
+	v = readl(refo->ctrl_reg);
+	v &= ~(REFO_SEL_MASK << REFO_SEL_SHIFT);
+	v |= index << REFO_SEL_SHIFT;
+
+	writel(v, refo->ctrl_reg);
+
+	spin_unlock_irqrestore(&refo->core->reg_lock, flags);
+
+	return 0;
+}
+
+static int roclk_set_rate_and_parent(struct clk_hw *hw,
+				     unsigned long rate,
+				     unsigned long parent_rate,
+				     u8 index)
+{
+	struct pic32_ref_osc *refo = clkhw_to_refosc(hw);
+	unsigned long flags;
+	u32 trim, rodiv, v;
+	int err;
+
+	/* calculate new rodiv & rotrim for new rate */
+	roclk_calc_div_trim(rate, parent_rate, &rodiv, &trim);
+
+	pr_debug("parent_rate = %lu, rate = %lu, div = %d, trim = %d\n",
+		 parent_rate, rate, rodiv, trim);
+
+	/* wait till source change is active */
+	err = readl_poll_timeout(refo->ctrl_reg, v,
+				 !(v & (REFO_ACTIVE | REFO_DIVSW_EN)),
+				 1, LOCK_TIMEOUT_US);
+	if (err) {
+		pr_err("%s: poll timedout, clock is still active\n", __func__);
+		return err;
+	}
+
+	spin_lock_irqsave(&refo->core->reg_lock, flags);
+	v = readl(refo->ctrl_reg);
+
+	pic32_syskey_unlock();
+
+	/* apply parent, if required */
+	if (refo->parent_map)
+		index = refo->parent_map[index];
+
+	v &= ~(REFO_SEL_MASK << REFO_SEL_SHIFT);
+	v |= index << REFO_SEL_SHIFT;
+
+	/* apply RODIV */
+	v &= ~(REFO_DIV_MASK << REFO_DIV_SHIFT);
+	v |= rodiv << REFO_DIV_SHIFT;
+	writel(v, refo->ctrl_reg);
+
+	/* apply ROTRIM */
+	v = readl(refo->ctrl_reg + REFO_TRIM_REG);
+	v &= ~(REFO_TRIM_MASK << REFO_TRIM_SHIFT);
+	v |= trim << REFO_TRIM_SHIFT;
+	writel(v, refo->ctrl_reg + REFO_TRIM_REG);
+
+	/* enable & activate divider switching */
+	writel(REFO_ON | REFO_DIVSW_EN, PIC32_SET(refo->ctrl_reg));
+
+	/* wait till divswen is in-progress */
+	err = readl_poll_timeout_atomic(refo->ctrl_reg, v, !(v & REFO_DIVSW_EN),
+					1, LOCK_TIMEOUT_US);
+	/* leave the clk gated as it was */
+	writel(REFO_ON, PIC32_CLR(refo->ctrl_reg));
+
+	spin_unlock_irqrestore(&refo->core->reg_lock, flags);
+
+	return err;
+}
+
+static int roclk_set_rate(struct clk_hw *hw, unsigned long rate,
+			  unsigned long parent_rate)
+{
+	u8 index = roclk_get_parent(hw);
+
+	return roclk_set_rate_and_parent(hw, rate, parent_rate, index);
+}
+
+const struct clk_ops pic32_roclk_ops = {
+	.enable			= roclk_enable,
+	.disable		= roclk_disable,
+	.is_enabled		= roclk_is_enabled,
+	.get_parent		= roclk_get_parent,
+	.set_parent		= roclk_set_parent,
+	.determine_rate		= roclk_determine_rate,
+	.recalc_rate		= roclk_recalc_rate,
+	.set_rate_and_parent	= roclk_set_rate_and_parent,
+	.set_rate		= roclk_set_rate,
+	.init			= roclk_init,
+};
+
+struct clk *pic32_refo_clk_register(const struct pic32_ref_osc_data *data,
+				    struct pic32_clk_common *core)
+{
+	struct pic32_ref_osc *refo;
+	struct clk *clk;
+
+	refo = devm_kzalloc(core->dev, sizeof(*refo), GFP_KERNEL);
+	if (!refo)
+		return ERR_PTR(-ENOMEM);
+
+	refo->core = core;
+	refo->hw.init = &data->init_data;
+	refo->ctrl_reg = data->ctrl_reg + core->iobase;
+	refo->parent_map = data->parent_map;
+
+	clk = devm_clk_register(core->dev, &refo->hw);
+	if (IS_ERR(clk))
+		dev_err(core->dev, "%s: clk_register() failed\n", __func__);
+
+	return clk;
+}
+
+struct pic32_sys_pll {
+	struct clk_hw hw;
+	void __iomem *ctrl_reg;
+	void __iomem *status_reg;
+	u32 lock_mask;
+	u32 idiv; /* PLL iclk divider, treated fixed */
+	struct pic32_clk_common *core;
+};
+
+#define clkhw_to_spll(_hw)	container_of(_hw, struct pic32_sys_pll, hw)
+
+static inline u32 spll_odiv_to_divider(u32 odiv)
+{
+	odiv = clamp_val(odiv, PLL_ODIV_MIN, PLL_ODIV_MAX);
+
+	return 1 << odiv;
+}
+
+static unsigned long spll_calc_mult_div(struct pic32_sys_pll *pll,
+					unsigned long rate,
+					unsigned long parent_rate,
+					u32 *mult_p, u32 *odiv_p)
+{
+	u32 mul, div, best_mul = 1, best_div = 1;
+	unsigned long new_rate, best_rate = rate;
+	unsigned int best_delta = -1, delta, match_found = 0;
+	u64 rate64;
+
+	parent_rate /= pll->idiv;
+
+	for (mul = 1; mul <= PLL_MULT_MAX; mul++) {
+		for (div = PLL_ODIV_MIN; div <= PLL_ODIV_MAX; div++) {
+			rate64 = parent_rate;
+			rate64 *= mul;
+			do_div(rate64, 1 << div);
+			new_rate = rate64;
+			delta = abs(rate - new_rate);
+			if ((new_rate >= rate) && (delta < best_delta)) {
+				best_delta = delta;
+				best_rate = new_rate;
+				best_mul = mul;
+				best_div = div;
+				match_found = 1;
+			}
+		}
+	}
+
+	if (!match_found) {
+		pr_warn("spll: no match found\n");
+		return 0;
+	}
+
+	pr_debug("rate %lu, par_rate %lu/mult %u, div %u, best_rate %lu\n",
+		 rate, parent_rate, best_mul, best_div, best_rate);
+
+	if (mult_p)
+		*mult_p = best_mul - 1;
+
+	if (odiv_p)
+		*odiv_p = best_div;
+
+	return best_rate;
+}
+
+static unsigned long spll_clk_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct pic32_sys_pll *pll = clkhw_to_spll(hw);
+	unsigned long pll_in_rate;
+	u32 mult, odiv, div, v;
+	u64 rate64;
+
+	v = readl(pll->ctrl_reg);
+	odiv = ((v >> PLL_ODIV_SHIFT) & PLL_ODIV_MASK);
+	mult = ((v >> PLL_MULT_SHIFT) & PLL_MULT_MASK) + 1;
+	div = spll_odiv_to_divider(odiv);
+
+	/* pll_in_rate = parent_rate / idiv
+	 * pll_out_rate = pll_in_rate * mult / div;
+	 */
+	pll_in_rate = parent_rate / pll->idiv;
+	rate64 = pll_in_rate;
+	rate64 *= mult;
+	do_div(rate64, div);
+
+	return rate64;
+}
+
+static long spll_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *parent_rate)
+{
+	struct pic32_sys_pll *pll = clkhw_to_spll(hw);
+
+	return spll_calc_mult_div(pll, rate, *parent_rate, NULL, NULL);
+}
+
+static int spll_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long parent_rate)
+{
+	struct pic32_sys_pll *pll = clkhw_to_spll(hw);
+	unsigned long ret, flags;
+	u32 mult, odiv, v;
+	int err;
+
+	ret = spll_calc_mult_div(pll, rate, parent_rate, &mult, &odiv);
+	if (!ret)
+		return -EINVAL;
+
+	/*
+	 * We can't change SPLL counters when it is in-active use
+	 * by SYSCLK. So check before applying new counters/rate.
+	 */
+
+	/* Is spll_clk active parent of sys_clk ? */
+	if (unlikely(clk_hw_get_parent(pic32_sclk_hw) == hw)) {
+		pr_err("%s: failed, clk in-use\n", __func__);
+		return -EBUSY;
+	}
+
+	spin_lock_irqsave(&pll->core->reg_lock, flags);
+
+	/* apply new multiplier & divisor */
+	v = readl(pll->ctrl_reg);
+	v &= ~(PLL_MULT_MASK << PLL_MULT_SHIFT);
+	v &= ~(PLL_ODIV_MASK << PLL_ODIV_SHIFT);
+	v |= (mult << PLL_MULT_SHIFT) | (odiv << PLL_ODIV_SHIFT);
+
+	/* sys unlock before write */
+	pic32_syskey_unlock();
+
+	writel(v, pll->ctrl_reg);
+	cpu_relax();
+
+	/* insert few nops (5-stage) to ensure CPU does not hang */
+	cpu_nop5();
+	cpu_nop5();
+
+	/* Wait until PLL is locked (maximum 100 usecs). */
+	err = readl_poll_timeout_atomic(pll->status_reg, v,
+					v & pll->lock_mask, 1, 100);
+	spin_unlock_irqrestore(&pll->core->reg_lock, flags);
+
+	return err;
+}
+
+/* SPLL clock operation */
+const struct clk_ops pic32_spll_ops = {
+	.recalc_rate	= spll_clk_recalc_rate,
+	.round_rate	= spll_clk_round_rate,
+	.set_rate	= spll_clk_set_rate,
+};
+
+struct clk *pic32_spll_clk_register(const struct pic32_sys_pll_data *data,
+				    struct pic32_clk_common *core)
+{
+	struct pic32_sys_pll *spll;
+	struct clk *clk;
+
+	spll = devm_kzalloc(core->dev, sizeof(*spll), GFP_KERNEL);
+	if (!spll)
+		return ERR_PTR(-ENOMEM);
+
+	spll->core = core;
+	spll->hw.init = &data->init_data;
+	spll->ctrl_reg = data->ctrl_reg + core->iobase;
+	spll->status_reg = data->status_reg + core->iobase;
+	spll->lock_mask = data->lock_mask;
+
+	/* cache PLL idiv; PLL driver uses it as constant.*/
+	spll->idiv = (readl(spll->ctrl_reg) >> PLL_IDIV_SHIFT) & PLL_IDIV_MASK;
+	spll->idiv += 1;
+
+	clk = devm_clk_register(core->dev, &spll->hw);
+	if (IS_ERR(clk))
+		dev_err(core->dev, "sys_pll: clk_register() failed\n");
+
+	return clk;
+}
+
+/* System mux clock(aka SCLK) */
+
+struct pic32_sys_clk {
+	struct clk_hw hw;
+	void __iomem *mux_reg;
+	void __iomem *slew_reg;
+	u32 slew_div;
+	const u32 *parent_map;
+	struct pic32_clk_common *core;
+};
+
+#define clkhw_to_sys_clk(_hw)	container_of(_hw, struct pic32_sys_clk, hw)
+
+static unsigned long sclk_get_rate(struct clk_hw *hw, unsigned long parent_rate)
+{
+	struct pic32_sys_clk *sclk = clkhw_to_sys_clk(hw);
+	u32 div;
+
+	div = (readl(sclk->slew_reg) >> SLEW_SYSDIV_SHIFT) & SLEW_SYSDIV;
+	div += 1; /* sys-div to divider */
+
+	return parent_rate / div;
+}
+
+static long sclk_round_rate(struct clk_hw *hw, unsigned long rate,
+			    unsigned long *parent_rate)
+{
+	return calc_best_divided_rate(rate, *parent_rate, SLEW_SYSDIV, 1);
+}
+
+static int sclk_set_rate(struct clk_hw *hw,
+			 unsigned long rate, unsigned long parent_rate)
+{
+	struct pic32_sys_clk *sclk = clkhw_to_sys_clk(hw);
+	unsigned long flags;
+	u32 v, div;
+	int err;
+
+	div = parent_rate / rate;
+
+	spin_lock_irqsave(&sclk->core->reg_lock, flags);
+
+	/* apply new div */
+	v = readl(sclk->slew_reg);
+	v &= ~(SLEW_SYSDIV << SLEW_SYSDIV_SHIFT);
+	v |= (div - 1) << SLEW_SYSDIV_SHIFT;
+
+	pic32_syskey_unlock();
+
+	writel(v, sclk->slew_reg);
+
+	/* wait until BUSY is cleared */
+	err = readl_poll_timeout_atomic(sclk->slew_reg, v,
+					!(v & SLEW_BUSY), 1, LOCK_TIMEOUT_US);
+
+	spin_unlock_irqrestore(&sclk->core->reg_lock, flags);
+
+	return err;
+}
+
+static u8 sclk_get_parent(struct clk_hw *hw)
+{
+	struct pic32_sys_clk *sclk = clkhw_to_sys_clk(hw);
+	u32 i, v;
+
+	v = (readl(sclk->mux_reg) >> OSC_CUR_SHIFT) & OSC_CUR_MASK;
+
+	if (!sclk->parent_map)
+		return v;
+
+	for (i = 0; i < clk_hw_get_num_parents(hw); i++)
+		if (sclk->parent_map[i] == v)
+			return i;
+	return -EINVAL;
+}
+
+static int sclk_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct pic32_sys_clk *sclk = clkhw_to_sys_clk(hw);
+	unsigned long flags;
+	u32 nosc, cosc, v;
+	int err;
+
+	spin_lock_irqsave(&sclk->core->reg_lock, flags);
+
+	/* find new_osc */
+	nosc = sclk->parent_map ? sclk->parent_map[index] : index;
+
+	/* set new parent */
+	v = readl(sclk->mux_reg);
+	v &= ~(OSC_NEW_MASK << OSC_NEW_SHIFT);
+	v |= nosc << OSC_NEW_SHIFT;
+
+	pic32_syskey_unlock();
+
+	writel(v, sclk->mux_reg);
+
+	/* initate switch */
+	writel(OSC_SWEN, PIC32_SET(sclk->mux_reg));
+	cpu_relax();
+
+	/* add nop to flush pipeline (as cpu_clk is in-flux) */
+	cpu_nop5();
+
+	/* wait for SWEN bit to clear */
+	err = readl_poll_timeout_atomic(sclk->slew_reg, v,
+					!(v & OSC_SWEN), 1, LOCK_TIMEOUT_US);
+
+	spin_unlock_irqrestore(&sclk->core->reg_lock, flags);
+
+	/*
+	 * SCLK clock-switching logic might reject a clock switching request
+	 * if pre-requisites (like new clk_src not present or unstable) are
+	 * not met.
+	 * So confirm before claiming success.
+	 */
+	cosc = (readl(sclk->mux_reg) >> OSC_CUR_SHIFT) & OSC_CUR_MASK;
+	if (cosc != nosc) {
+		pr_err("%s: err, failed to set_parent() to %d, current %d\n",
+		       clk_hw_get_name(hw), nosc, cosc);
+		err = -EBUSY;
+	}
+
+	return err;
+}
+
+static void sclk_init(struct clk_hw *hw)
+{
+	struct pic32_sys_clk *sclk = clkhw_to_sys_clk(hw);
+	unsigned long flags;
+	u32 v;
+
+	/* Maintain reference to this clk, required in spll_clk_set_rate() */
+	pic32_sclk_hw = hw;
+
+	/* apply slew divider on both up and down scaling */
+	if (sclk->slew_div) {
+		spin_lock_irqsave(&sclk->core->reg_lock, flags);
+		v = readl(sclk->slew_reg);
+		v &= ~(SLEW_DIV << SLEW_DIV_SHIFT);
+		v |= sclk->slew_div << SLEW_DIV_SHIFT;
+		v |= SLEW_DOWNEN | SLEW_UPEN;
+		writel(v, sclk->slew_reg);
+		spin_unlock_irqrestore(&sclk->core->reg_lock, flags);
+	}
+}
+
+/* sclk with post-divider */
+const struct clk_ops pic32_sclk_ops = {
+	.get_parent	= sclk_get_parent,
+	.set_parent	= sclk_set_parent,
+	.round_rate	= sclk_round_rate,
+	.set_rate	= sclk_set_rate,
+	.recalc_rate	= sclk_get_rate,
+	.init		= sclk_init,
+	.determine_rate = __clk_mux_determine_rate,
+};
+
+/* sclk with no slew and no post-divider */
+const struct clk_ops pic32_sclk_no_div_ops = {
+	.get_parent	= sclk_get_parent,
+	.set_parent	= sclk_set_parent,
+	.init		= sclk_init,
+	.determine_rate = __clk_mux_determine_rate,
+};
+
+struct clk *pic32_sys_clk_register(const struct pic32_sys_clk_data *data,
+				   struct pic32_clk_common *core)
+{
+	struct pic32_sys_clk *sclk;
+	struct clk *clk;
+
+	sclk = devm_kzalloc(core->dev, sizeof(*sclk), GFP_KERNEL);
+	if (!sclk)
+		return ERR_PTR(-ENOMEM);
+
+	sclk->core = core;
+	sclk->hw.init = &data->init_data;
+	sclk->mux_reg = data->mux_reg + core->iobase;
+	sclk->slew_reg = data->slew_reg + core->iobase;
+	sclk->slew_div = data->slew_div;
+	sclk->parent_map = data->parent_map;
+
+	clk = devm_clk_register(core->dev, &sclk->hw);
+	if (IS_ERR(clk))
+		dev_err(core->dev, "%s: clk register failed\n", __func__);
+
+	return clk;
+}
+
+/* secondary oscillator */
+struct pic32_sec_osc {
+	struct clk_hw hw;
+	void __iomem *enable_reg;
+	void __iomem *status_reg;
+	u32 enable_mask;
+	u32 status_mask;
+	unsigned long fixed_rate;
+	struct pic32_clk_common *core;
+};
+
+#define clkhw_to_sosc(_hw)	container_of(_hw, struct pic32_sec_osc, hw)
+static int sosc_clk_enable(struct clk_hw *hw)
+{
+	struct pic32_sec_osc *sosc = clkhw_to_sosc(hw);
+	u32 v;
+
+	/* enable SOSC */
+	pic32_syskey_unlock();
+	writel(sosc->enable_mask, PIC32_SET(sosc->enable_reg));
+
+	/* wait till warm-up period expires or ready-status is updated */
+	return readl_poll_timeout_atomic(sosc->status_reg, v,
+					 v & sosc->status_mask, 1, 100);
+}
+
+static void sosc_clk_disable(struct clk_hw *hw)
+{
+	struct pic32_sec_osc *sosc = clkhw_to_sosc(hw);
+
+	pic32_syskey_unlock();
+	writel(sosc->enable_mask, PIC32_CLR(sosc->enable_reg));
+}
+
+static int sosc_clk_is_enabled(struct clk_hw *hw)
+{
+	struct pic32_sec_osc *sosc = clkhw_to_sosc(hw);
+	u32 enabled, ready;
+
+	/* check enabled and ready status */
+	enabled = readl(sosc->enable_reg) & sosc->enable_mask;
+	ready = readl(sosc->status_reg) & sosc->status_mask;
+
+	return enabled && ready;
+}
+
+static unsigned long sosc_clk_calc_rate(struct clk_hw *hw,
+					unsigned long parent_rate)
+{
+	return clkhw_to_sosc(hw)->fixed_rate;
+}
+
+const struct clk_ops pic32_sosc_ops = {
+	.enable = sosc_clk_enable,
+	.disable = sosc_clk_disable,
+	.is_enabled = sosc_clk_is_enabled,
+	.recalc_rate = sosc_clk_calc_rate,
+};
+
+struct clk *pic32_sosc_clk_register(const struct pic32_sec_osc_data *data,
+				    struct pic32_clk_common *core)
+{
+	struct pic32_sec_osc *sosc;
+
+	sosc = devm_kzalloc(core->dev, sizeof(*sosc), GFP_KERNEL);
+	if (!sosc)
+		return ERR_PTR(-ENOMEM);
+
+	sosc->core = core;
+	sosc->hw.init = &data->init_data;
+	sosc->fixed_rate = data->fixed_rate;
+	sosc->enable_mask = data->enable_mask;
+	sosc->status_mask = data->status_mask;
+	sosc->enable_reg = data->enable_reg + core->iobase;
+	sosc->status_reg = data->status_reg + core->iobase;
+
+	return devm_clk_register(core->dev, &sosc->hw);
+}
diff --git a/drivers/clk/microchip/clk-core.h b/drivers/clk/microchip/clk-core.h
new file mode 100644
index 0000000..8566642
--- /dev/null
+++ b/drivers/clk/microchip/clk-core.h
@@ -0,0 +1,84 @@
+/*
+ * Purna Chandra Mandal,<purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef __MICROCHIP_CLK_PIC32_H_
+#define __MICROCHIP_CLK_PIC32_H_
+
+#include <linux/clk-provider.h>
+
+/* PIC32 clock data */
+struct pic32_clk_common {
+	struct device *dev;
+	void __iomem *iobase;
+	spinlock_t reg_lock; /* clock lock */
+};
+
+/* System PLL clock */
+struct pic32_sys_pll_data {
+	struct clk_init_data init_data;
+	const u32 ctrl_reg;
+	const u32 status_reg;
+	const u32 lock_mask;
+};
+
+/* System clock */
+struct pic32_sys_clk_data {
+	struct clk_init_data init_data;
+	const u32 mux_reg;
+	const u32 slew_reg;
+	const u32 *parent_map;
+	const u32 slew_div;
+};
+
+/* Reference Oscillator clock */
+struct pic32_ref_osc_data {
+	struct clk_init_data init_data;
+	const u32 ctrl_reg;
+	const u32 *parent_map;
+};
+
+/* Peripheral Bus clock */
+struct pic32_periph_clk_data {
+	struct clk_init_data init_data;
+	const u32 ctrl_reg;
+};
+
+/* External Secondary Oscillator clock  */
+struct pic32_sec_osc_data {
+	struct clk_init_data init_data;
+	const u32 enable_reg;
+	const u32 status_reg;
+	const u32 enable_mask;
+	const u32 status_mask;
+	const unsigned long fixed_rate;
+};
+
+extern const struct clk_ops pic32_pbclk_ops;
+extern const struct clk_ops pic32_sclk_ops;
+extern const struct clk_ops pic32_sclk_no_div_ops;
+extern const struct clk_ops pic32_spll_ops;
+extern const struct clk_ops pic32_roclk_ops;
+extern const struct clk_ops pic32_sosc_ops;
+
+struct clk *pic32_periph_clk_register(const struct pic32_periph_clk_data *data,
+				      struct pic32_clk_common *core);
+struct clk *pic32_refo_clk_register(const struct pic32_ref_osc_data *data,
+				    struct pic32_clk_common *core);
+struct clk *pic32_sys_clk_register(const struct pic32_sys_clk_data *data,
+				   struct pic32_clk_common *core);
+struct clk *pic32_spll_clk_register(const struct pic32_sys_pll_data *data,
+				    struct pic32_clk_common *core);
+struct clk *pic32_sosc_clk_register(const struct pic32_sec_osc_data *data,
+				    struct pic32_clk_common *core);
+
+#endif /* __MICROCHIP_CLK_PIC32_H_*/
diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c
new file mode 100644
index 0000000..020a29a
--- /dev/null
+++ b/drivers/clk/microchip/clk-pic32mzda.c
@@ -0,0 +1,275 @@
+/*
+ * Purna Chandra Mandal,<purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <dt-bindings/clock/microchip,pic32-clock.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <asm/traps.h>
+
+#include "clk-core.h"
+
+/* FRC Postscaler */
+#define OSC_FRCDIV_MASK		0x07
+#define OSC_FRCDIV_SHIFT	24
+
+/* SPLL fields */
+#define PLL_ICLK_MASK		0x01
+#define PLL_ICLK_SHIFT		7
+
+#define DECLARE_PERIPHERAL_CLOCK(__clk_name, __reg, __flags)	\
+	{							\
+		.ctrl_reg = (__reg),				\
+		.init_data = {					\
+			.name = (__clk_name),			\
+			.parent_names = (const char *[]) {	\
+				"sys_clk"			\
+			},					\
+			.num_parents = 1,			\
+			.ops = &pic32_pbclk_ops,		\
+			.flags = (__flags),			\
+		},						\
+	}
+
+#define DECLARE_REFO_CLOCK(__clkid, __reg)				\
+	{								\
+		.ctrl_reg = (__reg),					\
+		.init_data = {						\
+			.name = "refo" #__clkid "_clk",			\
+			.parent_names = (const char *[]) {		\
+				"sys_clk", "pb1_clk", "posc_clk",	\
+				"frc_clk", "lprc_clk", "sosc_clk",	\
+				"sys_pll", "refi" #__clkid "_clk",	\
+				"bfrc_clk",				\
+			},						\
+			.num_parents = 9,				\
+			.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE,\
+			.ops = &pic32_roclk_ops,			\
+		},							\
+		.parent_map = (const u32[]) {				\
+			0, 1, 2, 3, 4, 5, 7, 8, 9			\
+		},							\
+	}
+
+static const struct pic32_ref_osc_data ref_clks[] = {
+	DECLARE_REFO_CLOCK(1, 0x80),
+	DECLARE_REFO_CLOCK(2, 0xa0),
+	DECLARE_REFO_CLOCK(3, 0xc0),
+	DECLARE_REFO_CLOCK(4, 0xe0),
+	DECLARE_REFO_CLOCK(5, 0x100),
+};
+
+static const struct pic32_periph_clk_data periph_clocks[] = {
+	DECLARE_PERIPHERAL_CLOCK("pb1_clk", 0x140, 0),
+	DECLARE_PERIPHERAL_CLOCK("pb2_clk", 0x150, CLK_IGNORE_UNUSED),
+	DECLARE_PERIPHERAL_CLOCK("pb3_clk", 0x160, 0),
+	DECLARE_PERIPHERAL_CLOCK("pb4_clk", 0x170, 0),
+	DECLARE_PERIPHERAL_CLOCK("pb5_clk", 0x180, 0),
+	DECLARE_PERIPHERAL_CLOCK("pb6_clk", 0x190, 0),
+	DECLARE_PERIPHERAL_CLOCK("cpu_clk", 0x1a0, CLK_IGNORE_UNUSED),
+};
+
+static const struct pic32_sys_clk_data sys_mux_clk = {
+	.slew_reg = 0x1c0,
+	.slew_div = 2, /* step of div_4 -> div_2 -> no_div */
+	.init_data = {
+		.name = "sys_clk",
+		.parent_names = (const char *[]) {
+			"frcdiv_clk", "sys_pll", "posc_clk",
+			"sosc_clk", "lprc_clk", "frcdiv_clk",
+		},
+		.num_parents = 6,
+		.ops = &pic32_sclk_ops,
+	},
+	.parent_map = (const u32[]) {
+		0, 1, 2, 4, 5, 7,
+	},
+};
+
+static const struct pic32_sys_pll_data sys_pll = {
+	.ctrl_reg = 0x020,
+	.status_reg = 0x1d0,
+	.lock_mask = BIT(7),
+	.init_data = {
+		.name = "sys_pll",
+		.parent_names = (const char *[]) {
+			"spll_mux_clk"
+		},
+		.num_parents = 1,
+		.ops = &pic32_spll_ops,
+	},
+};
+
+static const struct pic32_sec_osc_data sosc_clk = {
+	.status_reg = 0x1d0,
+	.enable_mask = BIT(1),
+	.status_mask = BIT(4),
+	.init_data = {
+		.name = "sosc_clk",
+		.parent_names = NULL,
+		.ops = &pic32_sosc_ops,
+	},
+};
+
+static int pic32mzda_critical_clks[] = {
+	PB2CLK, PB7CLK
+};
+
+/* PIC32MZDA clock data */
+struct pic32mzda_clk_data {
+	struct clk *clks[MAXCLKS];
+	struct pic32_clk_common core;
+	struct clk_onecell_data onecell_data;
+	struct notifier_block failsafe_notifier;
+};
+
+static int pic32_fscm_nmi(struct notifier_block *nb,
+			  unsigned long action, void *data)
+{
+	struct pic32mzda_clk_data *cd;
+
+	cd  = container_of(nb, struct pic32mzda_clk_data, failsafe_notifier);
+
+	/* SYSCLK is now running from BFRCCLK. Report clock failure. */
+	if (readl(cd->core.iobase) & BIT(2))
+		pr_alert("pic32-clk: FSCM detected clk failure.\n");
+
+	/* TODO: detect reason of failure and recover accordingly */
+
+	return NOTIFY_OK;
+}
+
+static int pic32mzda_clk_probe(struct platform_device *pdev)
+{
+	const char *const pll_mux_parents[] = {"posc_clk", "frc_clk"};
+	struct device_node *np = pdev->dev.of_node;
+	struct pic32mzda_clk_data *cd;
+	struct pic32_clk_common *core;
+	struct clk *pll_mux_clk, *clk;
+	struct clk **clks;
+	int nr_clks, i, ret;
+
+	cd = devm_kzalloc(&pdev->dev, sizeof(*cd), GFP_KERNEL);
+	if (!cd)
+		return -ENOMEM;
+
+	core = &cd->core;
+	core->iobase = of_io_request_and_map(np, 0, of_node_full_name(np));
+	if (IS_ERR(core->iobase)) {
+		dev_err(&pdev->dev, "pic32-clk: failed to map registers\n");
+		return PTR_ERR(core->iobase);
+	}
+
+	spin_lock_init(&core->reg_lock);
+	core->dev = &pdev->dev;
+	clks = &cd->clks[0];
+
+	/* register fixed rate clocks */
+	clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL,
+						CLK_IS_ROOT, 24000000);
+	clks[FRCCLK] =  clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL,
+						CLK_IS_ROOT, 8000000);
+	clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL,
+						CLK_IS_ROOT, 8000000);
+	clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL,
+						CLK_IS_ROOT, 32000);
+	clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL,
+						CLK_IS_ROOT, 24000000);
+	/* fixed rate (optional) clock */
+	if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) {
+		pr_info("pic32-clk: dt requests SOSC.\n");
+		clks[SOSCCLK] = pic32_sosc_clk_register(&sosc_clk, core);
+	}
+	/* divider clock */
+	clks[FRCDIVCLK] = clk_register_divider(&pdev->dev, "frcdiv_clk",
+					       "frc_clk", 0,
+					       core->iobase,
+					       OSC_FRCDIV_SHIFT,
+					       OSC_FRCDIV_MASK,
+					       CLK_DIVIDER_POWER_OF_TWO,
+					       &core->reg_lock);
+	/* PLL ICLK mux */
+	pll_mux_clk = clk_register_mux(&pdev->dev, "spll_mux_clk",
+				       pll_mux_parents, 2, 0,
+				       core->iobase + 0x020,
+				       PLL_ICLK_SHIFT, 1, 0, &core->reg_lock);
+	if (IS_ERR(pll_mux_clk))
+		pr_err("spll_mux_clk: clk register failed\n");
+
+	/* PLL */
+	clks[PLLCLK] = pic32_spll_clk_register(&sys_pll, core);
+	/* SYSTEM clock */
+	clks[SCLK] = pic32_sys_clk_register(&sys_mux_clk, core);
+	/* Peripheral bus clocks */
+	for (nr_clks = PB1CLK, i = 0; nr_clks <= PB7CLK; i++, nr_clks++)
+		clks[nr_clks] = pic32_periph_clk_register(&periph_clocks[i],
+							  core);
+	/* Reference oscillator clock */
+	for (nr_clks = REF1CLK, i = 0; nr_clks <= REF5CLK; i++, nr_clks++)
+		clks[nr_clks] = pic32_refo_clk_register(&ref_clks[i], core);
+
+	/* register clkdev */
+	for (i = 0; i < MAXCLKS; i++) {
+		if (IS_ERR(clks[i]))
+			continue;
+		clk_register_clkdev(clks[i], NULL, __clk_get_name(clks[i]));
+	}
+
+	/* register clock provider */
+	cd->onecell_data.clks = clks;
+	cd->onecell_data.clk_num = MAXCLKS;
+	ret = of_clk_add_provider(np, of_clk_src_onecell_get,
+				  &cd->onecell_data);
+	if (ret)
+		return ret;
+
+	/* force enable critical clocks */
+	for (i = 0; i < ARRAY_SIZE(pic32mzda_critical_clks); i++) {
+		clk = clks[pic32mzda_critical_clks[i]];
+		if (clk_prepare_enable(clk))
+			dev_err(&pdev->dev, "clk_prepare_enable(%s) failed\n",
+				__clk_get_name(clk));
+	}
+
+	/* register NMI for failsafe clock monitor */
+	cd->failsafe_notifier.notifier_call = pic32_fscm_nmi;
+	return register_nmi_notifier(&cd->failsafe_notifier);
+}
+
+static const struct of_device_id pic32mzda_clk_match_table[] = {
+	{ .compatible = "microchip,pic32mzda-clk", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, pic32mzda_clk_match_table);
+
+static struct platform_driver pic32mzda_clk_driver = {
+	.probe		= pic32mzda_clk_probe,
+	.driver		= {
+		.name	= "clk-pic32mzda",
+		.of_match_table = pic32mzda_clk_match_table,
+	},
+};
+
+static int __init microchip_pic32mzda_clk_init(void)
+{
+	return platform_driver_register(&pic32mzda_clk_driver);
+}
+core_initcall(microchip_pic32mzda_clk_init);
+
+MODULE_DESCRIPTION("Microchip PIC32MZDA Clock Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:clk-pic32mzda");
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 6ff327a..47352d2 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -181,6 +181,16 @@
 	  This option enables support for Texas Instruments 32.768 Hz clocksource
 	  available on many OMAP-like platforms.
 
+config CLKSRC_NPS
+	bool "NPS400 clocksource driver" if COMPILE_TEST
+	depends on !PHYS_ADDR_T_64BIT
+	select CLKSRC_MMIO
+	select CLKSRC_OF if OF
+	help
+	  NPS400 clocksource support.
+	  Got 64 bit counter with update rate up to 1000MHz.
+	  This counter is accessed via couple of 32 bit memory mapped registers.
+
 config CLKSRC_STM32
 	bool "Clocksource for STM32 SoCs" if !ARCH_STM32
 	depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index b0a3c96..473974f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_MTK_TIMER)		+= mtk_timer.o
 obj-$(CONFIG_CLKSRC_PISTACHIO)	+= time-pistachio.o
 obj-$(CONFIG_CLKSRC_TI_32K)	+= timer-ti-32k.o
+obj-$(CONFIG_CLKSRC_NPS)	+= timer-nps.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
 obj-$(CONFIG_ARM_GLOBAL_TIMER)		+= arm_global_timer.o
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b38..4814446 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -468,11 +468,11 @@
 	.mask	= CLOCKSOURCE_MASK(56),
 };
 
-static struct timecounter timecounter;
+static struct arch_timer_kvm_info arch_timer_kvm_info;
 
-struct timecounter *arch_timer_get_timecounter(void)
+struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
 {
-	return &timecounter;
+	return &arch_timer_kvm_info;
 }
 
 static void __init arch_counter_register(unsigned type)
@@ -500,7 +500,8 @@
 	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
 	cyclecounter.mult = clocksource_counter.mult;
 	cyclecounter.shift = clocksource_counter.shift;
-	timecounter_init(&timecounter, &cyclecounter, start_count);
+	timecounter_init(&arch_timer_kvm_info.timecounter,
+			 &cyclecounter, start_count);
 
 	/* 56 bits minimum, so we assume worst case rollover */
 	sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
@@ -744,6 +745,8 @@
 
 	arch_timer_register();
 	arch_timer_common_init();
+
+	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI];
 }
 
 static void __init arch_timer_of_init(struct device_node *np)
diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c
new file mode 100644
index 0000000..d461089
--- /dev/null
+++ b/drivers/clocksource/timer-nps.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/cpu.h>
+#include <soc/nps/common.h>
+
+#define NPS_MSU_TICK_LOW	0xC8
+#define NPS_CLUSTER_OFFSET	8
+#define NPS_CLUSTER_NUM		16
+
+/* This array is per cluster of CPUs (Each NPS400 cluster got 256 CPUs) */
+static void *nps_msu_reg_low_addr[NPS_CLUSTER_NUM] __read_mostly;
+
+static unsigned long nps_timer_rate;
+
+static cycle_t nps_clksrc_read(struct clocksource *clksrc)
+{
+	int cluster = raw_smp_processor_id() >> NPS_CLUSTER_OFFSET;
+
+	return (cycle_t)ioread32be(nps_msu_reg_low_addr[cluster]);
+}
+
+static void __init nps_setup_clocksource(struct device_node *node,
+					 struct clk *clk)
+{
+	int ret, cluster;
+
+	for (cluster = 0; cluster < NPS_CLUSTER_NUM; cluster++)
+		nps_msu_reg_low_addr[cluster] =
+			nps_host_reg((cluster << NPS_CLUSTER_OFFSET),
+				 NPS_MSU_BLKID, NPS_MSU_TICK_LOW);
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Couldn't enable parent clock\n");
+		return;
+	}
+
+	nps_timer_rate = clk_get_rate(clk);
+
+	ret = clocksource_mmio_init(nps_msu_reg_low_addr, "EZnps-tick",
+				    nps_timer_rate, 301, 32, nps_clksrc_read);
+	if (ret) {
+		pr_err("Couldn't register clock source.\n");
+		clk_disable_unprepare(clk);
+	}
+}
+
+static void __init nps_timer_init(struct device_node *node)
+{
+	struct clk *clk;
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Can't get timer clock.\n");
+		return;
+	}
+
+	nps_setup_clocksource(node, clk);
+}
+
+CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer",
+		       nps_timer_init);
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index e1eb11e..0a9b6a09 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -102,7 +102,7 @@
 obj-$(CONFIG_ETRAXFS)			+= cris-etraxfs-cpufreq.o
 obj-$(CONFIG_IA64_ACPI_CPUFREQ)		+= ia64-acpi-cpufreq.o
 obj-$(CONFIG_LOONGSON2_CPUFREQ)		+= loongson2_cpufreq.o
-obj-$(CONFIG_LOONGSON1_CPUFREQ)		+= ls1x-cpufreq.o
+obj-$(CONFIG_LOONGSON1_CPUFREQ)		+= loongson1-cpufreq.o
 obj-$(CONFIG_SH_CPU_FREQ)		+= sh-cpufreq.o
 obj-$(CONFIG_SPARC_US2E_CPUFREQ)	+= sparc-us2e-cpufreq.o
 obj-$(CONFIG_SPARC_US3_CPUFREQ)		+= sparc-us3-cpufreq.o
diff --git a/drivers/cpufreq/ls1x-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c
similarity index 65%
rename from drivers/cpufreq/ls1x-cpufreq.c
rename to drivers/cpufreq/loongson1-cpufreq.c
index 262581b..be89416 100644
--- a/drivers/cpufreq/ls1x-cpufreq.c
+++ b/drivers/cpufreq/loongson1-cpufreq.c
@@ -1,7 +1,7 @@
 /*
  * CPU Frequency Scaling for Loongson 1 SoC
  *
- * Copyright (C) 2014 Zhang, Keguang <keguang.zhang@gmail.com>
+ * Copyright (C) 2014-2016 Zhang, Keguang <keguang.zhang@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -20,7 +20,7 @@
 #include <cpufreq.h>
 #include <loongson1.h>
 
-static struct {
+struct ls1x_cpufreq {
 	struct device *dev;
 	struct clk *clk;	/* CPU clk */
 	struct clk *mux_clk;	/* MUX of CPU clk */
@@ -28,7 +28,9 @@
 	struct clk *osc_clk;	/* OSC clk */
 	unsigned int max_freq;
 	unsigned int min_freq;
-} ls1x_cpufreq;
+};
+
+static struct ls1x_cpufreq *cpufreq;
 
 static int ls1x_cpufreq_notifier(struct notifier_block *nb,
 				 unsigned long val, void *data)
@@ -46,6 +48,7 @@
 static int ls1x_cpufreq_target(struct cpufreq_policy *policy,
 			       unsigned int index)
 {
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
 	unsigned int old_freq, new_freq;
 
 	old_freq = policy->cur;
@@ -60,53 +63,49 @@
 	 *  - Reparent CPU clk back to CPU DIV clk
 	 */
 
-	dev_dbg(ls1x_cpufreq.dev, "%u KHz --> %u KHz\n", old_freq, new_freq);
-	clk_set_parent(policy->clk, ls1x_cpufreq.osc_clk);
+	clk_set_parent(policy->clk, cpufreq->osc_clk);
 	__raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU,
 		     LS1X_CLK_PLL_DIV);
 	__raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU),
 		     LS1X_CLK_PLL_DIV);
-	clk_set_rate(ls1x_cpufreq.mux_clk, new_freq * 1000);
-	clk_set_parent(policy->clk, ls1x_cpufreq.mux_clk);
+	clk_set_rate(cpufreq->mux_clk, new_freq * 1000);
+	clk_set_parent(policy->clk, cpufreq->mux_clk);
+	dev_dbg(cpu_dev, "%u KHz --> %u KHz\n", old_freq, new_freq);
 
 	return 0;
 }
 
 static int ls1x_cpufreq_init(struct cpufreq_policy *policy)
 {
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
 	struct cpufreq_frequency_table *freq_tbl;
 	unsigned int pll_freq, freq;
 	int steps, i, ret;
 
-	pll_freq = clk_get_rate(ls1x_cpufreq.pll_clk) / 1000;
+	pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000;
 
 	steps = 1 << DIV_CPU_WIDTH;
-	freq_tbl = kzalloc(sizeof(*freq_tbl) * steps, GFP_KERNEL);
-	if (!freq_tbl) {
-		dev_err(ls1x_cpufreq.dev,
-			"failed to alloc cpufreq_frequency_table\n");
-		ret = -ENOMEM;
-		goto out;
-	}
+	freq_tbl = kcalloc(steps, sizeof(*freq_tbl), GFP_KERNEL);
+	if (!freq_tbl)
+		return -ENOMEM;
 
 	for (i = 0; i < (steps - 1); i++) {
 		freq = pll_freq / (i + 1);
-		if ((freq < ls1x_cpufreq.min_freq) ||
-		    (freq > ls1x_cpufreq.max_freq))
+		if ((freq < cpufreq->min_freq) || (freq > cpufreq->max_freq))
 			freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
 			freq_tbl[i].frequency = freq;
-		dev_dbg(ls1x_cpufreq.dev,
+		dev_dbg(cpu_dev,
 			"cpufreq table: index %d: frequency %d\n", i,
 			freq_tbl[i].frequency);
 	}
 	freq_tbl[i].frequency = CPUFREQ_TABLE_END;
 
-	policy->clk = ls1x_cpufreq.clk;
+	policy->clk = cpufreq->clk;
 	ret = cpufreq_generic_init(policy, freq_tbl, 0);
 	if (ret)
 		kfree(freq_tbl);
-out:
+
 	return ret;
 }
 
@@ -138,85 +137,86 @@
 
 static int ls1x_cpufreq_probe(struct platform_device *pdev)
 {
-	struct plat_ls1x_cpufreq *pdata = pdev->dev.platform_data;
+	struct plat_ls1x_cpufreq *pdata = dev_get_platdata(&pdev->dev);
 	struct clk *clk;
 	int ret;
 
-	if (!pdata || !pdata->clk_name || !pdata->osc_clk_name)
+	if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) {
+		dev_err(&pdev->dev, "platform data missing\n");
 		return -EINVAL;
+	}
 
-	ls1x_cpufreq.dev = &pdev->dev;
+	cpufreq =
+	    devm_kzalloc(&pdev->dev, sizeof(struct ls1x_cpufreq), GFP_KERNEL);
+	if (!cpufreq)
+		return -ENOMEM;
+
+	cpufreq->dev = &pdev->dev;
 
 	clk = devm_clk_get(&pdev->dev, pdata->clk_name);
 	if (IS_ERR(clk)) {
-		dev_err(ls1x_cpufreq.dev, "unable to get %s clock\n",
+		dev_err(&pdev->dev, "unable to get %s clock\n",
 			pdata->clk_name);
-		ret = PTR_ERR(clk);
-		goto out;
+		return PTR_ERR(clk);
 	}
-	ls1x_cpufreq.clk = clk;
+	cpufreq->clk = clk;
 
 	clk = clk_get_parent(clk);
 	if (IS_ERR(clk)) {
-		dev_err(ls1x_cpufreq.dev, "unable to get parent of %s clock\n",
-			__clk_get_name(ls1x_cpufreq.clk));
-		ret = PTR_ERR(clk);
-		goto out;
+		dev_err(&pdev->dev, "unable to get parent of %s clock\n",
+			__clk_get_name(cpufreq->clk));
+		return PTR_ERR(clk);
 	}
-	ls1x_cpufreq.mux_clk = clk;
+	cpufreq->mux_clk = clk;
 
 	clk = clk_get_parent(clk);
 	if (IS_ERR(clk)) {
-		dev_err(ls1x_cpufreq.dev, "unable to get parent of %s clock\n",
-			__clk_get_name(ls1x_cpufreq.mux_clk));
-		ret = PTR_ERR(clk);
-		goto out;
+		dev_err(&pdev->dev, "unable to get parent of %s clock\n",
+			__clk_get_name(cpufreq->mux_clk));
+		return PTR_ERR(clk);
 	}
-	ls1x_cpufreq.pll_clk = clk;
+	cpufreq->pll_clk = clk;
 
 	clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name);
 	if (IS_ERR(clk)) {
-		dev_err(ls1x_cpufreq.dev, "unable to get %s clock\n",
+		dev_err(&pdev->dev, "unable to get %s clock\n",
 			pdata->osc_clk_name);
-		ret = PTR_ERR(clk);
-		goto out;
+		return PTR_ERR(clk);
 	}
-	ls1x_cpufreq.osc_clk = clk;
+	cpufreq->osc_clk = clk;
 
-	ls1x_cpufreq.max_freq = pdata->max_freq;
-	ls1x_cpufreq.min_freq = pdata->min_freq;
+	cpufreq->max_freq = pdata->max_freq;
+	cpufreq->min_freq = pdata->min_freq;
 
 	ret = cpufreq_register_driver(&ls1x_cpufreq_driver);
 	if (ret) {
-		dev_err(ls1x_cpufreq.dev,
-			"failed to register cpufreq driver: %d\n", ret);
-		goto out;
+		dev_err(&pdev->dev,
+			"failed to register CPUFreq driver: %d\n", ret);
+		return ret;
 	}
 
 	ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block,
 					CPUFREQ_TRANSITION_NOTIFIER);
 
-	if (!ret)
-		goto out;
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to register CPUFreq notifier: %d\n",ret);
+		cpufreq_unregister_driver(&ls1x_cpufreq_driver);
+	}
 
-	dev_err(ls1x_cpufreq.dev, "failed to register cpufreq notifier: %d\n",
-		ret);
-
-	cpufreq_unregister_driver(&ls1x_cpufreq_driver);
-out:
 	return ret;
 }
 
 static struct platform_driver ls1x_cpufreq_platdrv = {
-	.driver = {
+	.probe	= ls1x_cpufreq_probe,
+	.remove	= ls1x_cpufreq_remove,
+	.driver	= {
 		.name	= "ls1x-cpufreq",
 	},
-	.probe		= ls1x_cpufreq_probe,
-	.remove		= ls1x_cpufreq_remove,
 };
 
 module_platform_driver(ls1x_cpufreq_platdrv);
 
 MODULE_AUTHOR("Kelvin Cheung <keguang.zhang@gmail.com>");
-MODULE_DESCRIPTION("Loongson 1 CPUFreq driver");
+MODULE_DESCRIPTION("Loongson1 CPUFreq driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d96d87c..8c98779 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -332,7 +332,7 @@
 
 config MV_XOR
 	bool "Marvell XOR engine support"
-	depends on PLAT_ORION
+	depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST
 	select DMA_ENGINE
 	select DMA_ENGINE_RAID
 	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
@@ -467,6 +467,20 @@
 	  This DMA controller transfers data from memory to peripheral fifo
 	  or vice versa. It does not support memory to memory data transfer.
 
+config TEGRA210_ADMA
+	bool "NVIDIA Tegra210 ADMA support"
+	depends on ARCH_TEGRA_210_SOC
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select PM_CLK
+	help
+	  Support for the NVIDIA Tegra210 ADMA controller driver. The
+	  DMA controller has multiple DMA channels and is used to service
+	  various audio clients in the Tegra210 audio processing engine
+	  (APE). This DMA controller transfers data from memory to
+	  peripheral and vice versa. It does not support memory to
+	  memory data transfer.
+
 config TIMB_DMA
 	tristate "Timberdale FPGA DMA support"
 	depends on MFD_TIMBERDALE
@@ -507,7 +521,7 @@
 
 config XILINX_VDMA
 	tristate "Xilinx AXI VDMA Engine"
-	depends on (ARCH_ZYNQ || MICROBLAZE)
+	depends on (ARCH_ZYNQ || MICROBLAZE || ARM64)
 	select DMA_ENGINE
 	help
 	  Enable support for Xilinx AXI VDMA Soft IP.
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 6084127..614f28b 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -59,6 +59,7 @@
 obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 9b42c05..81db1c4 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -107,16 +107,20 @@
 /**
  * struct vendor_data - vendor-specific config parameters for PL08x derivatives
  * @channels: the number of channels available in this variant
+ * @signals: the number of request signals available from the hardware
  * @dualmaster: whether this version supports dual AHB masters or not.
  * @nomadik: whether the channels have Nomadik security extension bits
  *	that need to be checked for permission before use and some registers are
  *	missing
  * @pl080s: whether this version is a PL080S, which has separate register and
  *	LLI word for transfer size.
+ * @max_transfer_size: the maximum single element transfer size for this
+ *	PL08x variant.
  */
 struct vendor_data {
 	u8 config_offset;
 	u8 channels;
+	u8 signals;
 	bool dualmaster;
 	bool nomadik;
 	bool pl080s;
@@ -235,7 +239,7 @@
 	struct virt_dma_chan vc;
 	struct pl08x_phy_chan *phychan;
 	const char *name;
-	const struct pl08x_channel_data *cd;
+	struct pl08x_channel_data *cd;
 	struct dma_slave_config cfg;
 	struct pl08x_txd *at;
 	struct pl08x_driver_data *host;
@@ -1909,6 +1913,12 @@
 
 		if (slave) {
 			chan->cd = &pl08x->pd->slave_channels[i];
+			/*
+			 * Some implementations have muxed signals, whereas some
+			 * use a mux in front of the signals and need dynamic
+			 * assignment of signals.
+			 */
+			chan->signal = i;
 			pl08x_dma_slave_init(chan);
 		} else {
 			chan->cd = &pl08x->pd->memcpy_channel;
@@ -2050,40 +2060,33 @@
 				       struct of_dma *ofdma)
 {
 	struct pl08x_driver_data *pl08x = ofdma->of_dma_data;
-	struct pl08x_channel_data *data;
-	struct pl08x_dma_chan *chan;
 	struct dma_chan *dma_chan;
+	struct pl08x_dma_chan *plchan;
 
 	if (!pl08x)
 		return NULL;
 
-	if (dma_spec->args_count != 2)
+	if (dma_spec->args_count != 2) {
+		dev_err(&pl08x->adev->dev,
+			"DMA channel translation requires two cells\n");
 		return NULL;
+	}
 
 	dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]);
-	if (dma_chan)
-		return dma_get_slave_channel(dma_chan);
-
-	chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data),
-			    GFP_KERNEL);
-	if (!chan)
+	if (!dma_chan) {
+		dev_err(&pl08x->adev->dev,
+			"DMA slave channel not found\n");
 		return NULL;
+	}
 
-	data = (void *)&chan[1];
-	data->bus_id = "(none)";
-	data->periph_buses = dma_spec->args[1];
+	plchan = to_pl08x_chan(dma_chan);
+	dev_dbg(&pl08x->adev->dev,
+		"translated channel for signal %d\n",
+		dma_spec->args[0]);
 
-	chan->cd = data;
-	chan->host = pl08x;
-	chan->slave = true;
-	chan->name = data->bus_id;
-	chan->state = PL08X_CHAN_IDLE;
-	chan->signal = dma_spec->args[0];
-	chan->vc.desc_free = pl08x_desc_free;
-
-	vchan_init(&chan->vc, &pl08x->slave);
-
-	return dma_get_slave_channel(&chan->vc.chan);
+	/* Augment channel data for applicable AHB buses */
+	plchan->cd->periph_buses = dma_spec->args[1];
+	return dma_get_slave_channel(dma_chan);
 }
 
 static int pl08x_of_probe(struct amba_device *adev,
@@ -2091,9 +2094,11 @@
 			  struct device_node *np)
 {
 	struct pl08x_platform_data *pd;
+	struct pl08x_channel_data *chanp = NULL;
 	u32 cctl_memcpy = 0;
 	u32 val;
 	int ret;
+	int i;
 
 	pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL);
 	if (!pd)
@@ -2195,6 +2200,27 @@
 	/* Use the buses that can access memory, obviously */
 	pd->memcpy_channel.periph_buses = pd->mem_buses;
 
+	/*
+	 * Allocate channel data for all possible slave channels (one
+	 * for each possible signal), channels will then be allocated
+	 * for a device and have it's AHB interfaces set up at
+	 * translation time.
+	 */
+	chanp = devm_kcalloc(&adev->dev,
+			pl08x->vd->signals,
+			sizeof(struct pl08x_channel_data),
+			GFP_KERNEL);
+	if (!chanp)
+		return -ENOMEM;
+
+	pd->slave_channels = chanp;
+	for (i = 0; i < pl08x->vd->signals; i++) {
+		/* chanp->periph_buses will be assigned at translation */
+		chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
+		chanp++;
+	}
+	pd->num_slave_channels = pl08x->vd->signals;
+
 	pl08x->pd = pd;
 
 	return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate,
@@ -2234,6 +2260,10 @@
 		goto out_no_pl08x;
 	}
 
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
 	/* Initialize memcpy engine */
 	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
 	pl08x->memcpy.dev = &adev->dev;
@@ -2284,10 +2314,6 @@
 		}
 	}
 
-	/* Assign useful pointers to the driver state */
-	pl08x->adev = adev;
-	pl08x->vd = vd;
-
 	/* By default, AHB1 only.  If dualmaster, from platform */
 	pl08x->lli_buses = PL08X_AHB1;
 	pl08x->mem_buses = PL08X_AHB1;
@@ -2438,6 +2464,7 @@
 static struct vendor_data vendor_pl080 = {
 	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
+	.signals = 16,
 	.dualmaster = true,
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
@@ -2445,6 +2472,7 @@
 static struct vendor_data vendor_nomadik = {
 	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
+	.signals = 32,
 	.dualmaster = true,
 	.nomadik = true,
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
@@ -2453,6 +2481,7 @@
 static struct vendor_data vendor_pl080s = {
 	.config_offset = PL080S_CH_CONFIG,
 	.channels = 8,
+	.signals = 32,
 	.pl080s = true,
 	.max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK,
 };
@@ -2460,6 +2489,7 @@
 static struct vendor_data vendor_pl081 = {
 	.config_offset = PL080_CH_CONFIG,
 	.channels = 2,
+	.signals = 16,
 	.dualmaster = false,
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index 996c4b0..6149b27 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -46,6 +46,9 @@
 
 #include "virt-dma.h"
 
+#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
+#define BCM2835_DMA_CHAN_NAME_SIZE 8
+
 struct bcm2835_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
@@ -73,7 +76,6 @@
 	struct list_head node;
 
 	struct dma_slave_config	cfg;
-	bool cyclic;
 	unsigned int dreq;
 
 	int ch;
@@ -82,6 +84,9 @@
 
 	void __iomem *chan_base;
 	int irq_number;
+	unsigned int irq_flags;
+
+	bool is_lite_channel;
 };
 
 struct bcm2835_desc {
@@ -89,47 +94,104 @@
 	struct virt_dma_desc vd;
 	enum dma_transfer_direction dir;
 
-	struct bcm2835_cb_entry *cb_list;
-
 	unsigned int frames;
 	size_t size;
+
+	bool cyclic;
+
+	struct bcm2835_cb_entry cb_list[];
 };
 
 #define BCM2835_DMA_CS		0x00
 #define BCM2835_DMA_ADDR	0x04
+#define BCM2835_DMA_TI		0x08
 #define BCM2835_DMA_SOURCE_AD	0x0c
 #define BCM2835_DMA_DEST_AD	0x10
-#define BCM2835_DMA_NEXTCB	0x1C
+#define BCM2835_DMA_LEN		0x14
+#define BCM2835_DMA_STRIDE	0x18
+#define BCM2835_DMA_NEXTCB	0x1c
+#define BCM2835_DMA_DEBUG	0x20
 
 /* DMA CS Control and Status bits */
-#define BCM2835_DMA_ACTIVE	BIT(0)
-#define BCM2835_DMA_INT	BIT(2)
+#define BCM2835_DMA_ACTIVE	BIT(0)  /* activate the DMA */
+#define BCM2835_DMA_END		BIT(1)  /* current CB has ended */
+#define BCM2835_DMA_INT		BIT(2)  /* interrupt status */
+#define BCM2835_DMA_DREQ	BIT(3)  /* DREQ state */
 #define BCM2835_DMA_ISPAUSED	BIT(4)  /* Pause requested or not active */
 #define BCM2835_DMA_ISHELD	BIT(5)  /* Is held by DREQ flow control */
-#define BCM2835_DMA_ERR	BIT(8)
+#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last
+					       * AXI-write to ack
+					       */
+#define BCM2835_DMA_ERR		BIT(8)
+#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */
+#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */
+/* current value of TI.BCM2835_DMA_WAIT_RESP */
+#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28)
+#define BCM2835_DMA_DIS_DEBUG	BIT(29) /* disable debug pause signal */
 #define BCM2835_DMA_ABORT	BIT(30) /* Stop current CB, go to next, WO */
 #define BCM2835_DMA_RESET	BIT(31) /* WO, self clearing */
 
+/* Transfer information bits - also bcm2835_cb.info field */
 #define BCM2835_DMA_INT_EN	BIT(0)
+#define BCM2835_DMA_TDMODE	BIT(1) /* 2D-Mode */
+#define BCM2835_DMA_WAIT_RESP	BIT(3) /* wait for AXI-write to be acked */
 #define BCM2835_DMA_D_INC	BIT(4)
-#define BCM2835_DMA_D_DREQ	BIT(6)
+#define BCM2835_DMA_D_WIDTH	BIT(5) /* 128bit writes if set */
+#define BCM2835_DMA_D_DREQ	BIT(6) /* enable DREQ for destination */
+#define BCM2835_DMA_D_IGNORE	BIT(7) /* ignore destination writes */
 #define BCM2835_DMA_S_INC	BIT(8)
-#define BCM2835_DMA_S_DREQ	BIT(10)
+#define BCM2835_DMA_S_WIDTH	BIT(9) /* 128bit writes if set */
+#define BCM2835_DMA_S_DREQ	BIT(10) /* enable SREQ for source */
+#define BCM2835_DMA_S_IGNORE	BIT(11) /* ignore source reads - read 0 */
+#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12)
+#define BCM2835_DMA_PER_MAP(x)	((x & 31) << 16) /* REQ source */
+#define BCM2835_DMA_WAIT(x)	((x & 31) << 21) /* add DMA-wait cycles */
+#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */
 
-#define BCM2835_DMA_PER_MAP(x)	((x) << 16)
+/* debug register bits */
+#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR	BIT(0)
+#define BCM2835_DMA_DEBUG_FIFO_ERR		BIT(1)
+#define BCM2835_DMA_DEBUG_READ_ERR		BIT(2)
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4
+#define BCM2835_DMA_DEBUG_ID_SHIFT		16
+#define BCM2835_DMA_DEBUG_ID_BITS		9
+#define BCM2835_DMA_DEBUG_STATE_SHIFT		16
+#define BCM2835_DMA_DEBUG_STATE_BITS		9
+#define BCM2835_DMA_DEBUG_VERSION_SHIFT		25
+#define BCM2835_DMA_DEBUG_VERSION_BITS		3
+#define BCM2835_DMA_DEBUG_LITE			BIT(28)
+
+/* shared registers for all dma channels */
+#define BCM2835_DMA_INT_STATUS         0xfe0
+#define BCM2835_DMA_ENABLE             0xff0
 
 #define BCM2835_DMA_DATA_TYPE_S8	1
 #define BCM2835_DMA_DATA_TYPE_S16	2
 #define BCM2835_DMA_DATA_TYPE_S32	4
 #define BCM2835_DMA_DATA_TYPE_S128	16
 
-#define BCM2835_DMA_BULK_MASK	BIT(0)
-#define BCM2835_DMA_FIQ_MASK	(BIT(2) | BIT(3))
-
 /* Valid only for channels 0 - 14, 15 has its own base address */
 #define BCM2835_DMA_CHAN(n)	((n) << 8) /* Base address */
 #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
 
+/* the max dma length for different channels */
+#define MAX_DMA_LEN SZ_1G
+#define MAX_LITE_DMA_LEN (SZ_64K - 4)
+
+static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
+{
+	/* lite and normal channels have different max frame length */
+	return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN;
+}
+
+/* how many frames of max_len size do we need to transfer len bytes */
+static inline size_t bcm2835_dma_frames_for_length(size_t len,
+						   size_t max_len)
+{
+	return DIV_ROUND_UP(len, max_len);
+}
+
 static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
 {
 	return container_of(d, struct bcm2835_dmadev, ddev);
@@ -146,19 +208,209 @@
 	return container_of(t, struct bcm2835_desc, vd.tx);
 }
 
-static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
+static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
 {
-	struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd);
-	int i;
+	size_t i;
 
 	for (i = 0; i < desc->frames; i++)
 		dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb,
 			      desc->cb_list[i].paddr);
 
-	kfree(desc->cb_list);
 	kfree(desc);
 }
 
+static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
+{
+	bcm2835_dma_free_cb_chain(
+		container_of(vd, struct bcm2835_desc, vd));
+}
+
+static void bcm2835_dma_create_cb_set_length(
+	struct bcm2835_chan *chan,
+	struct bcm2835_dma_cb *control_block,
+	size_t len,
+	size_t period_len,
+	size_t *total_len,
+	u32 finalextrainfo)
+{
+	size_t max_len = bcm2835_dma_max_frame_length(chan);
+
+	/* set the length taking lite-channel limitations into account */
+	control_block->length = min_t(u32, len, max_len);
+
+	/* finished if we have no period_length */
+	if (!period_len)
+		return;
+
+	/*
+	 * period_len means: that we need to generate
+	 * transfers that are terminating at every
+	 * multiple of period_len - this is typically
+	 * used to set the interrupt flag in info
+	 * which is required during cyclic transfers
+	 */
+
+	/* have we filled in period_length yet? */
+	if (*total_len + control_block->length < period_len)
+		return;
+
+	/* calculate the length that remains to reach period_length */
+	control_block->length = period_len - *total_len;
+
+	/* reset total_length for next period */
+	*total_len = 0;
+
+	/* add extrainfo bits in info */
+	control_block->info |= finalextrainfo;
+}
+
+static inline size_t bcm2835_dma_count_frames_for_sg(
+	struct bcm2835_chan *c,
+	struct scatterlist *sgl,
+	unsigned int sg_len)
+{
+	size_t frames = 0;
+	struct scatterlist *sgent;
+	unsigned int i;
+	size_t plength = bcm2835_dma_max_frame_length(c);
+
+	for_each_sg(sgl, sgent, sg_len, i)
+		frames += bcm2835_dma_frames_for_length(
+			sg_dma_len(sgent), plength);
+
+	return frames;
+}
+
+/**
+ * bcm2835_dma_create_cb_chain - create a control block and fills data in
+ *
+ * @chan:           the @dma_chan for which we run this
+ * @direction:      the direction in which we transfer
+ * @cyclic:         it is a cyclic transfer
+ * @info:           the default info bits to apply per controlblock
+ * @frames:         number of controlblocks to allocate
+ * @src:            the src address to assign (if the S_INC bit is set
+ *                  in @info, then it gets incremented)
+ * @dst:            the dst address to assign (if the D_INC bit is set
+ *                  in @info, then it gets incremented)
+ * @buf_len:        the full buffer length (may also be 0)
+ * @period_len:     the period length when to apply @finalextrainfo
+ *                  in addition to the last transfer
+ *                  this will also break some control-blocks early
+ * @finalextrainfo: additional bits in last controlblock
+ *                  (or when period_len is reached in case of cyclic)
+ * @gfp:            the GFP flag to use for allocation
+ */
+static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
+	struct dma_chan *chan, enum dma_transfer_direction direction,
+	bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
+	dma_addr_t src, dma_addr_t dst, size_t buf_len,
+	size_t period_len, gfp_t gfp)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	size_t len = buf_len, total_len;
+	size_t frame;
+	struct bcm2835_desc *d;
+	struct bcm2835_cb_entry *cb_entry;
+	struct bcm2835_dma_cb *control_block;
+
+	if (!frames)
+		return NULL;
+
+	/* allocate and setup the descriptor. */
+	d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry),
+		    gfp);
+	if (!d)
+		return NULL;
+
+	d->c = c;
+	d->dir = direction;
+	d->cyclic = cyclic;
+
+	/*
+	 * Iterate over all frames, create a control block
+	 * for each frame and link them together.
+	 */
+	for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) {
+		cb_entry = &d->cb_list[frame];
+		cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp,
+					      &cb_entry->paddr);
+		if (!cb_entry->cb)
+			goto error_cb;
+
+		/* fill in the control block */
+		control_block = cb_entry->cb;
+		control_block->info = info;
+		control_block->src = src;
+		control_block->dst = dst;
+		control_block->stride = 0;
+		control_block->next = 0;
+		/* set up length in control_block if requested */
+		if (buf_len) {
+			/* calculate length honoring period_length */
+			bcm2835_dma_create_cb_set_length(
+				c, control_block,
+				len, period_len, &total_len,
+				cyclic ? finalextrainfo : 0);
+
+			/* calculate new remaining length */
+			len -= control_block->length;
+		}
+
+		/* link this the last controlblock */
+		if (frame)
+			d->cb_list[frame - 1].cb->next = cb_entry->paddr;
+
+		/* update src and dst and length */
+		if (src && (info & BCM2835_DMA_S_INC))
+			src += control_block->length;
+		if (dst && (info & BCM2835_DMA_D_INC))
+			dst += control_block->length;
+
+		/* Length of total transfer */
+		d->size += control_block->length;
+	}
+
+	/* the last frame requires extra flags */
+	d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
+
+	/* detect a size missmatch */
+	if (buf_len && (d->size != buf_len))
+		goto error_cb;
+
+	return d;
+error_cb:
+	bcm2835_dma_free_cb_chain(d);
+
+	return NULL;
+}
+
+static void bcm2835_dma_fill_cb_chain_with_sg(
+	struct dma_chan *chan,
+	enum dma_transfer_direction direction,
+	struct bcm2835_cb_entry *cb,
+	struct scatterlist *sgl,
+	unsigned int sg_len)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	size_t max_len = bcm2835_dma_max_frame_length(c);
+	unsigned int i, len;
+	dma_addr_t addr;
+	struct scatterlist *sgent;
+
+	for_each_sg(sgl, sgent, sg_len, i) {
+		for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
+		     len > 0;
+		     addr += cb->cb->length, len -= cb->cb->length, cb++) {
+			if (direction == DMA_DEV_TO_MEM)
+				cb->cb->dst = addr;
+			else
+				cb->cb->src = addr;
+			cb->cb->length = min(len, max_len);
+		}
+	}
+}
+
 static int bcm2835_dma_abort(void __iomem *chan_base)
 {
 	unsigned long cs;
@@ -218,6 +470,15 @@
 	struct bcm2835_desc *d;
 	unsigned long flags;
 
+	/* check the shared interrupt */
+	if (c->irq_flags & IRQF_SHARED) {
+		/* check if the interrupt is enabled */
+		flags = readl(c->chan_base + BCM2835_DMA_CS);
+		/* if not set then we are not the reason for the irq */
+		if (!(flags & BCM2835_DMA_INT))
+			return IRQ_NONE;
+	}
+
 	spin_lock_irqsave(&c->vc.lock, flags);
 
 	/* Acknowledge interrupt */
@@ -226,12 +487,18 @@
 	d = c->desc;
 
 	if (d) {
-		/* TODO Only works for cyclic DMA */
-		vchan_cyclic_callback(&d->vd);
-	}
+		if (d->cyclic) {
+			/* call the cyclic callback */
+			vchan_cyclic_callback(&d->vd);
 
-	/* Keep the DMA engine running */
-	writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
+			/* Keep the DMA engine running */
+			writel(BCM2835_DMA_ACTIVE,
+			       c->chan_base + BCM2835_DMA_CS);
+		} else {
+			vchan_cookie_complete(&c->desc->vd);
+			bcm2835_dma_start_desc(c);
+		}
+	}
 
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 
@@ -252,8 +519,8 @@
 		return -ENOMEM;
 	}
 
-	return request_irq(c->irq_number,
-			bcm2835_dma_callback, 0, "DMA IRQ", c);
+	return request_irq(c->irq_number, bcm2835_dma_callback,
+			   c->irq_flags, "DMA IRQ", c);
 }
 
 static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
@@ -339,8 +606,6 @@
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 	unsigned long flags;
 
-	c->cyclic = true; /* Nothing else is implemented */
-
 	spin_lock_irqsave(&c->vc.lock, flags);
 	if (vchan_issue_pending(&c->vc) && !c->desc)
 		bcm2835_dma_start_desc(c);
@@ -348,18 +613,98 @@
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 }
 
+struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_desc *d;
+	u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC;
+	u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP;
+	size_t max_len = bcm2835_dma_max_frame_length(c);
+	size_t frames;
+
+	/* if src, dst or len is not given return with an error */
+	if (!src || !dst || !len)
+		return NULL;
+
+	/* calculate number of frames */
+	frames = bcm2835_dma_frames_for_length(len, max_len);
+
+	/* allocate the CB chain - this also fills in the pointers */
+	d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
+					info, extra, frames,
+					src, dst, len, 0, GFP_KERNEL);
+	if (!d)
+		return NULL;
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
+	struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_desc *d;
+	dma_addr_t src = 0, dst = 0;
+	u32 info = BCM2835_DMA_WAIT_RESP;
+	u32 extra = BCM2835_DMA_INT_EN;
+	size_t frames;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan->device->dev,
+			"%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (c->dreq != 0)
+		info |= BCM2835_DMA_PER_MAP(c->dreq);
+
+	if (direction == DMA_DEV_TO_MEM) {
+		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		src = c->cfg.src_addr;
+		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+	} else {
+		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		dst = c->cfg.dst_addr;
+		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
+	}
+
+	/* count frames in sg list */
+	frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
+
+	/* allocate the CB chain */
+	d = bcm2835_dma_create_cb_chain(chan, direction, false,
+					info, extra,
+					frames, src, dst, 0, 0,
+					GFP_KERNEL);
+	if (!d)
+		return NULL;
+
+	/* fill in frames with scatterlist pointers */
+	bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
+					  sgl, sg_len);
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
 static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
 	unsigned long flags)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
-	enum dma_slave_buswidth dev_width;
 	struct bcm2835_desc *d;
-	dma_addr_t dev_addr;
-	unsigned int es, sync_type;
-	unsigned int frame;
-	int i;
+	dma_addr_t src, dst;
+	u32 info = BCM2835_DMA_WAIT_RESP;
+	u32 extra = BCM2835_DMA_INT_EN;
+	size_t max_len = bcm2835_dma_max_frame_length(c);
+	size_t frames;
 
 	/* Grab configuration */
 	if (!is_slave_direction(direction)) {
@@ -367,103 +712,61 @@
 		return NULL;
 	}
 
-	if (direction == DMA_DEV_TO_MEM) {
-		dev_addr = c->cfg.src_addr;
-		dev_width = c->cfg.src_addr_width;
-		sync_type = BCM2835_DMA_S_DREQ;
-	} else {
-		dev_addr = c->cfg.dst_addr;
-		dev_width = c->cfg.dst_addr_width;
-		sync_type = BCM2835_DMA_D_DREQ;
-	}
-
-	/* Bus width translates to the element size (ES) */
-	switch (dev_width) {
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		es = BCM2835_DMA_DATA_TYPE_S32;
-		break;
-	default:
+	if (!buf_len) {
+		dev_err(chan->device->dev,
+			"%s: bad buffer length (= 0)\n", __func__);
 		return NULL;
 	}
 
-	/* Now allocate and setup the descriptor. */
-	d = kzalloc(sizeof(*d), GFP_NOWAIT);
-	if (!d)
-		return NULL;
-
-	d->c = c;
-	d->dir = direction;
-	d->frames = buf_len / period_len;
-
-	d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL);
-	if (!d->cb_list) {
-		kfree(d);
-		return NULL;
-	}
-	/* Allocate memory for control blocks */
-	for (i = 0; i < d->frames; i++) {
-		struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
-
-		cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC,
-					       &cb_entry->paddr);
-		if (!cb_entry->cb)
-			goto error_cb;
-	}
-
 	/*
-	 * Iterate over all frames, create a control block
-	 * for each frame and link them together.
+	 * warn if buf_len is not a multiple of period_len - this may leed
+	 * to unexpected latencies for interrupts and thus audiable clicks
 	 */
-	for (frame = 0; frame < d->frames; frame++) {
-		struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb;
+	if (buf_len % period_len)
+		dev_warn_once(chan->device->dev,
+			      "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n",
+			      __func__, buf_len, period_len);
 
-		/* Setup adresses */
-		if (d->dir == DMA_DEV_TO_MEM) {
-			control_block->info = BCM2835_DMA_D_INC;
-			control_block->src = dev_addr;
-			control_block->dst = buf_addr + frame * period_len;
-		} else {
-			control_block->info = BCM2835_DMA_S_INC;
-			control_block->src = buf_addr + frame * period_len;
-			control_block->dst = dev_addr;
-		}
+	/* Setup DREQ channel */
+	if (c->dreq != 0)
+		info |= BCM2835_DMA_PER_MAP(c->dreq);
 
-		/* Enable interrupt */
-		control_block->info |= BCM2835_DMA_INT_EN;
-
-		/* Setup synchronization */
-		if (sync_type != 0)
-			control_block->info |= sync_type;
-
-		/* Setup DREQ channel */
-		if (c->dreq != 0)
-			control_block->info |=
-				BCM2835_DMA_PER_MAP(c->dreq);
-
-		/* Length of a frame */
-		control_block->length = period_len;
-		d->size += control_block->length;
-
-		/*
-		 * Next block is the next frame.
-		 * This DMA engine driver currently only supports cyclic DMA.
-		 * Therefore, wrap around at number of frames.
-		 */
-		control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr;
+	if (direction == DMA_DEV_TO_MEM) {
+		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		src = c->cfg.src_addr;
+		dst = buf_addr;
+		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+	} else {
+		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		dst = c->cfg.dst_addr;
+		src = buf_addr;
+		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
 	}
 
+	/* calculate number of frames */
+	frames = /* number of periods */
+		 DIV_ROUND_UP(buf_len, period_len) *
+		 /* number of frames per period */
+		 bcm2835_dma_frames_for_length(period_len, max_len);
+
+	/*
+	 * allocate the CB chain
+	 * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
+	 * implementation calls prep_dma_cyclic with interrupts disabled.
+	 */
+	d = bcm2835_dma_create_cb_chain(chan, direction, true,
+					info, extra,
+					frames, src, dst, buf_len,
+					period_len, GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	/* wrap around into a loop */
+	d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
+
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
-error_cb:
-	i--;
-	for (; i >= 0; i--) {
-		struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
-
-		dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr);
-	}
-
-	kfree(d->cb_list);
-	kfree(d);
-	return NULL;
 }
 
 static int bcm2835_dma_slave_config(struct dma_chan *chan,
@@ -529,7 +832,8 @@
 	return 0;
 }
 
-static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq)
+static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id,
+				 int irq, unsigned int irq_flags)
 {
 	struct bcm2835_chan *c;
 
@@ -544,6 +848,12 @@
 	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
 	c->ch = chan_id;
 	c->irq_number = irq;
+	c->irq_flags = irq_flags;
+
+	/* check in DEBUG register if this is a LITE channel */
+	if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
+		BCM2835_DMA_DEBUG_LITE)
+		c->is_lite_channel = true;
 
 	return 0;
 }
@@ -587,9 +897,11 @@
 	struct resource *res;
 	void __iomem *base;
 	int rc;
-	int i;
-	int irq;
+	int i, j;
+	int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1];
+	int irq_flags;
 	uint32_t chans_available;
+	char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
 
 	if (!pdev->dev.dma_mask)
 		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
@@ -615,16 +927,22 @@
 	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
 	dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
 	od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
 	od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
 	od->ddev.device_tx_status = bcm2835_dma_tx_status;
 	od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
 	od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
+	od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg;
+	od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy;
 	od->ddev.device_config = bcm2835_dma_slave_config;
 	od->ddev.device_terminate_all = bcm2835_dma_terminate_all;
 	od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
-	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+			      BIT(DMA_MEM_TO_MEM);
+	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	od->ddev.dev = &pdev->dev;
 	INIT_LIST_HEAD(&od->ddev.channels);
 	spin_lock_init(&od->lock);
@@ -640,22 +958,48 @@
 		goto err_no_dma;
 	}
 
-	/*
-	 * Do not use the FIQ and BULK channels,
-	 * because they are used by the GPU.
-	 */
-	chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK);
-
-	for (i = 0; i < pdev->num_resources; i++) {
-		irq = platform_get_irq(pdev, i);
-		if (irq < 0)
-			break;
-
-		if (chans_available & (1 << i)) {
-			rc = bcm2835_dma_chan_init(od, i, irq);
-			if (rc)
-				goto err_no_dma;
+	/* get irqs for each channel that we support */
+	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+		/* skip masked out channels */
+		if (!(chans_available & (1 << i))) {
+			irq[i] = -1;
+			continue;
 		}
+
+		/* get the named irq */
+		snprintf(chan_name, sizeof(chan_name), "dma%i", i);
+		irq[i] = platform_get_irq_byname(pdev, chan_name);
+		if (irq[i] >= 0)
+			continue;
+
+		/* legacy device tree case handling */
+		dev_warn_once(&pdev->dev,
+			      "missing interrupt-names property in device tree - legacy interpretation is used\n");
+		/*
+		 * in case of channel >= 11
+		 * use the 11th interrupt and that is shared
+		 */
+		irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
+	}
+
+	/* get irqs for each channel */
+	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+		/* skip channels without irq */
+		if (irq[i] < 0)
+			continue;
+
+		/* check if there are other channels that also use this irq */
+		irq_flags = 0;
+		for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
+			if ((i != j) && (irq[j] == irq[i])) {
+				irq_flags = IRQF_SHARED;
+				break;
+			}
+
+		/* initialize the channel */
+		rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
+		if (rc)
+			goto err_no_dma;
 	}
 
 	dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 0cb259c5..8c9f45f 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -289,7 +289,7 @@
 	do {
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
-			pr_err("%s: timeout!\n", __func__);
+			dev_err(chan->device->dev, "%s: timeout!\n", __func__);
 			return DMA_ERROR;
 		}
 		if (status != DMA_IN_PROGRESS)
@@ -482,7 +482,8 @@
 	device = chan->device;
 
 	/* check if the channel supports slave transactions */
-	if (!test_bit(DMA_SLAVE, device->cap_mask.bits))
+	if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) ||
+	      test_bit(DMA_CYCLIC, device->cap_mask.bits)))
 		return -ENXIO;
 
 	/*
@@ -518,7 +519,7 @@
 	struct dma_chan *chan;
 
 	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
-		pr_debug("%s: wrong capabilities\n", __func__);
+		dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__);
 		return NULL;
 	}
 	/* devices with multiple channels need special handling as we need to
@@ -533,12 +534,12 @@
 
 	list_for_each_entry(chan, &dev->channels, device_node) {
 		if (chan->client_count) {
-			pr_debug("%s: %s busy\n",
+			dev_dbg(dev->dev, "%s: %s busy\n",
 				 __func__, dma_chan_name(chan));
 			continue;
 		}
 		if (fn && !fn(chan, fn_param)) {
-			pr_debug("%s: %s filter said false\n",
+			dev_dbg(dev->dev, "%s: %s filter said false\n",
 				 __func__, dma_chan_name(chan));
 			continue;
 		}
@@ -567,11 +568,12 @@
 
 		if (err) {
 			if (err == -ENODEV) {
-				pr_debug("%s: %s module removed\n", __func__,
-					 dma_chan_name(chan));
+				dev_dbg(device->dev, "%s: %s module removed\n",
+					__func__, dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 			} else
-				pr_debug("%s: failed to get %s: (%d)\n",
+				dev_dbg(device->dev,
+					"%s: failed to get %s: (%d)\n",
 					 __func__, dma_chan_name(chan), err);
 
 			if (--device->privatecnt == 0)
@@ -602,7 +604,8 @@
 		device->privatecnt++;
 		err = dma_chan_get(chan);
 		if (err) {
-			pr_debug("%s: failed to get %s: (%d)\n",
+			dev_dbg(chan->device->dev,
+				"%s: failed to get %s: (%d)\n",
 				__func__, dma_chan_name(chan), err);
 			chan = NULL;
 			if (--device->privatecnt == 0)
@@ -814,8 +817,9 @@
 				list_del_rcu(&device->global_node);
 				break;
 			} else if (err)
-				pr_debug("%s: failed to get %s: (%d)\n",
-				       __func__, dma_chan_name(chan), err);
+				dev_dbg(chan->device->dev,
+					"%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
 		}
 	}
 
@@ -862,12 +866,12 @@
 		return false;
 	#endif
 
-	#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+	#if IS_ENABLED(CONFIG_ASYNC_MEMCPY)
 	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
 		return false;
 	#endif
 
-	#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+	#if IS_ENABLED(CONFIG_ASYNC_XOR)
 	if (!dma_has_cap(DMA_XOR, device->cap_mask))
 		return false;
 
@@ -877,7 +881,7 @@
 	#endif
 	#endif
 
-	#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+	#if IS_ENABLED(CONFIG_ASYNC_PQ)
 	if (!dma_has_cap(DMA_PQ, device->cap_mask))
 		return false;
 
@@ -1222,8 +1226,9 @@
 
 	while (tx->cookie == -EBUSY) {
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
-			pr_err("%s timeout waiting for descriptor submission\n",
-			       __func__);
+			dev_err(tx->chan->device->dev,
+				"%s timeout waiting for descriptor submission\n",
+				__func__);
 			return DMA_ERROR;
 		}
 		cpu_relax();
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 97199b3..edf053f 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -45,22 +45,19 @@
 			DW_DMA_MSIZE_16;			\
 		u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :	\
 			DW_DMA_MSIZE_16;			\
+		u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ?		\
+			_dwc->p_master : _dwc->m_master;		\
+		u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ?		\
+			_dwc->p_master : _dwc->m_master;		\
 								\
 		(DWC_CTLL_DST_MSIZE(_dmsize)			\
 		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
 		 | DWC_CTLL_LLP_D_EN				\
 		 | DWC_CTLL_LLP_S_EN				\
-		 | DWC_CTLL_DMS(_dwc->dst_master)		\
-		 | DWC_CTLL_SMS(_dwc->src_master));		\
+		 | DWC_CTLL_DMS(_dms)				\
+		 | DWC_CTLL_SMS(_sms));				\
 	})
 
-/*
- * Number of descriptors to allocate for each channel. This should be
- * made configurable somehow; preferably, the clients (at least the
- * ones using slave transfers) should be able to give us a hint.
- */
-#define NR_DESCS_PER_CHANNEL	64
-
 /* The set of bus widths supported by the DMA controller */
 #define DW_DMA_BUSWIDTHS			  \
 	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED)	| \
@@ -80,51 +77,65 @@
 	return to_dw_desc(dwc->active_list.next);
 }
 
-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	struct dw_desc *desc, *_desc;
-	struct dw_desc *ret = NULL;
-	unsigned int i = 0;
-	unsigned long flags;
+	struct dw_desc		*desc = txd_to_dw_desc(tx);
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
-		i++;
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del(&desc->desc_node);
-			ret = desc;
-			break;
-		}
-		dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
-	}
+	cookie = dma_cookie_assign(tx);
+
+	/*
+	 * REVISIT: We should attempt to chain as many descriptors as
+	 * possible, perhaps even appending to those already submitted
+	 * for DMA. But this is hard to do in a race-free manner.
+	 */
+
+	list_add_tail(&desc->desc_node, &dwc->queue);
 	spin_unlock_irqrestore(&dwc->lock, flags);
+	dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
+		 __func__, desc->txd.cookie);
 
-	dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
-
-	return ret;
+	return cookie;
 }
 
-/*
- * Move a descriptor, including any children, to the free list.
- * `desc' must not be on any lists.
- */
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_desc *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
+	if (!desc)
+		return NULL;
+
+	dwc->descs_allocated++;
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
+	desc->txd.tx_submit = dwc_tx_submit;
+	desc->txd.flags = DMA_CTRL_ACK;
+	desc->txd.phys = phys;
+	return desc;
+}
+
 static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 {
-	unsigned long flags;
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_desc *child, *_next;
 
-	if (desc) {
-		struct dw_desc *child;
+	if (unlikely(!desc))
+		return;
 
-		spin_lock_irqsave(&dwc->lock, flags);
-		list_for_each_entry(child, &desc->tx_list, desc_node)
-			dev_vdbg(chan2dev(&dwc->chan),
-					"moving child desc %p to freelist\n",
-					child);
-		list_splice_init(&desc->tx_list, &dwc->free_list);
-		dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
-		list_add(&desc->desc_node, &dwc->free_list);
-		spin_unlock_irqrestore(&dwc->lock, flags);
+	list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
+		list_del(&child->desc_node);
+		dma_pool_free(dw->desc_pool, child, child->txd.phys);
+		dwc->descs_allocated--;
 	}
+
+	dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+	dwc->descs_allocated--;
 }
 
 static void dwc_initialize(struct dw_dma_chan *dwc)
@@ -133,7 +144,7 @@
 	u32 cfghi = DWC_CFGH_FIFO_MODE;
 	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
 
-	if (dwc->initialized == true)
+	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
 		return;
 
 	cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
@@ -146,26 +157,11 @@
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
-	dwc->initialized = true;
+	set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
 }
 
 /*----------------------------------------------------------------------*/
 
-static inline unsigned int dwc_fast_ffs(unsigned long long v)
-{
-	/*
-	 * We can be a lot more clever here, but this should take care
-	 * of the most common optimization.
-	 */
-	if (!(v & 7))
-		return 3;
-	else if (!(v & 3))
-		return 2;
-	else if (!(v & 1))
-		return 1;
-	return 0;
-}
-
 static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
 {
 	dev_err(chan2dev(&dwc->chan),
@@ -197,12 +193,12 @@
 	 * Software emulation of LLP mode relies on interrupts to continue
 	 * multi block transfer.
 	 */
-	ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
+	ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
 
-	channel_writel(dwc, SAR, desc->lli.sar);
-	channel_writel(dwc, DAR, desc->lli.dar);
+	channel_writel(dwc, SAR, lli_read(desc, sar));
+	channel_writel(dwc, DAR, lli_read(desc, dar));
 	channel_writel(dwc, CTL_LO, ctllo);
-	channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
+	channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
 	channel_set_bit(dw, CH_EN, dwc->mask);
 
 	/* Move pointer to next descriptor */
@@ -213,6 +209,7 @@
 static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 {
 	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	u8		lms = DWC_LLP_LMS(dwc->m_master);
 	unsigned long	was_soft_llp;
 
 	/* ASSERT:  channel is idle */
@@ -237,7 +234,7 @@
 
 		dwc_initialize(dwc);
 
-		dwc->residue = first->total_len;
+		first->residue = first->total_len;
 		dwc->tx_node_active = &first->tx_list;
 
 		/* Submit first block */
@@ -248,9 +245,8 @@
 
 	dwc_initialize(dwc);
 
-	channel_writel(dwc, LLP, first->txd.phys);
-	channel_writel(dwc, CTL_LO,
-			DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	channel_writel(dwc, LLP, first->txd.phys | lms);
+	channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
 	channel_writel(dwc, CTL_HI, 0);
 	channel_set_bit(dw, CH_EN, dwc->mask);
 }
@@ -293,11 +289,7 @@
 	list_for_each_entry(child, &desc->tx_list, desc_node)
 		async_tx_ack(&child->txd);
 	async_tx_ack(&desc->txd);
-
-	list_splice_init(&desc->tx_list, &dwc->free_list);
-	list_move(&desc->desc_node, &dwc->free_list);
-
-	dma_descriptor_unmap(txd);
+	dwc_desc_put(dwc, desc);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	if (callback)
@@ -368,11 +360,11 @@
 
 			head = &desc->tx_list;
 			if (active != head) {
-				/* Update desc to reflect last sent one */
-				if (active != head->next)
-					desc = to_dw_desc(active->prev);
-
-				dwc->residue -= desc->len;
+				/* Update residue to reflect last sent descriptor */
+				if (active == head->next)
+					desc->residue -= desc->len;
+				else
+					desc->residue -= to_dw_desc(active->prev)->len;
 
 				child = to_dw_desc(active);
 
@@ -387,8 +379,6 @@
 			clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
 		}
 
-		dwc->residue = 0;
-
 		spin_unlock_irqrestore(&dwc->lock, flags);
 
 		dwc_complete_all(dw, dwc);
@@ -396,7 +386,6 @@
 	}
 
 	if (list_empty(&dwc->active_list)) {
-		dwc->residue = 0;
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		return;
 	}
@@ -411,31 +400,31 @@
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
 		/* Initial residue value */
-		dwc->residue = desc->total_len;
+		desc->residue = desc->total_len;
 
 		/* Check first descriptors addr */
-		if (desc->txd.phys == llp) {
+		if (desc->txd.phys == DWC_LLP_LOC(llp)) {
 			spin_unlock_irqrestore(&dwc->lock, flags);
 			return;
 		}
 
 		/* Check first descriptors llp */
-		if (desc->lli.llp == llp) {
+		if (lli_read(desc, llp) == llp) {
 			/* This one is currently in progress */
-			dwc->residue -= dwc_get_sent(dwc);
+			desc->residue -= dwc_get_sent(dwc);
 			spin_unlock_irqrestore(&dwc->lock, flags);
 			return;
 		}
 
-		dwc->residue -= desc->len;
+		desc->residue -= desc->len;
 		list_for_each_entry(child, &desc->tx_list, desc_node) {
-			if (child->lli.llp == llp) {
+			if (lli_read(child, llp) == llp) {
 				/* Currently in progress */
-				dwc->residue -= dwc_get_sent(dwc);
+				desc->residue -= dwc_get_sent(dwc);
 				spin_unlock_irqrestore(&dwc->lock, flags);
 				return;
 			}
-			dwc->residue -= child->len;
+			desc->residue -= child->len;
 		}
 
 		/*
@@ -457,10 +446,14 @@
 	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 
-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
 {
 	dev_crit(chan2dev(&dwc->chan), "  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
-		 lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
+		 lli_read(desc, sar),
+		 lli_read(desc, dar),
+		 lli_read(desc, llp),
+		 lli_read(desc, ctlhi),
+		 lli_read(desc, ctllo));
 }
 
 static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -496,9 +489,9 @@
 	 */
 	dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
 				       "  cookie: %d\n", bad_desc->txd.cookie);
-	dwc_dump_lli(dwc, &bad_desc->lli);
+	dwc_dump_lli(dwc, bad_desc);
 	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
-		dwc_dump_lli(dwc, &child->lli);
+		dwc_dump_lli(dwc, child);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -549,7 +542,7 @@
 	 */
 	if (unlikely(status_err & dwc->mask) ||
 			unlikely(status_xfer & dwc->mask)) {
-		int i;
+		unsigned int i;
 
 		dev_err(chan2dev(&dwc->chan),
 			"cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
@@ -571,7 +564,7 @@
 		dma_writel(dw, CLEAR.XFER, dwc->mask);
 
 		for (i = 0; i < dwc->cdesc->periods; i++)
-			dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
+			dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
 
 		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
@@ -589,7 +582,7 @@
 	u32 status_block;
 	u32 status_xfer;
 	u32 status_err;
-	int i;
+	unsigned int i;
 
 	status_block = dma_readl(dw, RAW.BLOCK);
 	status_xfer = dma_readl(dw, RAW.XFER);
@@ -658,30 +651,6 @@
 
 /*----------------------------------------------------------------------*/
 
-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct dw_desc		*desc = txd_to_dw_desc(tx);
-	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
-	dma_cookie_t		cookie;
-	unsigned long		flags;
-
-	spin_lock_irqsave(&dwc->lock, flags);
-	cookie = dma_cookie_assign(tx);
-
-	/*
-	 * REVISIT: We should attempt to chain as many descriptors as
-	 * possible, perhaps even appending to those already submitted
-	 * for DMA. But this is hard to do in a race-free manner.
-	 */
-
-	dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
-	list_add_tail(&desc->desc_node, &dwc->queue);
-
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
-	return cookie;
-}
-
 static struct dma_async_tx_descriptor *
 dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
@@ -693,10 +662,12 @@
 	struct dw_desc		*prev;
 	size_t			xfer_count;
 	size_t			offset;
+	u8			m_master = dwc->m_master;
 	unsigned int		src_width;
 	unsigned int		dst_width;
-	unsigned int		data_width;
+	unsigned int		data_width = dw->pdata->data_width[m_master];
 	u32			ctllo;
+	u8			lms = DWC_LLP_LMS(m_master);
 
 	dev_vdbg(chan2dev(chan),
 			"%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
@@ -709,11 +680,7 @@
 
 	dwc->direction = DMA_MEM_TO_MEM;
 
-	data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
-			   dw->data_width[dwc->dst_master]);
-
-	src_width = dst_width = min_t(unsigned int, data_width,
-				      dwc_fast_ffs(src | dest | len));
+	src_width = dst_width = __ffs(data_width | src | dest | len);
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
@@ -731,27 +698,27 @@
 		if (!desc)
 			goto err_desc_get;
 
-		desc->lli.sar = src + offset;
-		desc->lli.dar = dest + offset;
-		desc->lli.ctllo = ctllo;
-		desc->lli.ctlhi = xfer_count;
+		lli_write(desc, sar, src + offset);
+		lli_write(desc, dar, dest + offset);
+		lli_write(desc, ctllo, ctllo);
+		lli_write(desc, ctlhi, xfer_count);
 		desc->len = xfer_count << src_width;
 
 		if (!first) {
 			first = desc;
 		} else {
-			prev->lli.llp = desc->txd.phys;
-			list_add_tail(&desc->desc_node,
-					&first->tx_list);
+			lli_write(prev, llp, desc->txd.phys | lms);
+			list_add_tail(&desc->desc_node, &first->tx_list);
 		}
 		prev = desc;
 	}
 
 	if (flags & DMA_PREP_INTERRUPT)
 		/* Trigger interrupt after last block */
-		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+		lli_set(prev, ctllo, DWC_CTLL_INT_EN);
 
 	prev->lli.llp = 0;
+	lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
 	first->txd.flags = flags;
 	first->total_len = len;
 
@@ -773,10 +740,12 @@
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
 	u32			ctllo;
+	u8			m_master = dwc->m_master;
+	u8			lms = DWC_LLP_LMS(m_master);
 	dma_addr_t		reg;
 	unsigned int		reg_width;
 	unsigned int		mem_width;
-	unsigned int		data_width;
+	unsigned int		data_width = dw->pdata->data_width[m_master];
 	unsigned int		i;
 	struct scatterlist	*sg;
 	size_t			total_len = 0;
@@ -802,8 +771,6 @@
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
 			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
 
-		data_width = dw->data_width[dwc->src_master];
-
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -811,17 +778,16 @@
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = min_t(unsigned int,
-					  data_width, dwc_fast_ffs(mem | len));
+			mem_width = __ffs(data_width | mem | len);
 
 slave_sg_todev_fill_desc:
 			desc = dwc_desc_get(dwc);
 			if (!desc)
 				goto err_desc_get;
 
-			desc->lli.sar = mem;
-			desc->lli.dar = reg;
-			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+			lli_write(desc, sar, mem);
+			lli_write(desc, dar, reg);
+			lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
 			if ((len >> mem_width) > dwc->block_size) {
 				dlen = dwc->block_size << mem_width;
 				mem += dlen;
@@ -831,15 +797,14 @@
 				len = 0;
 			}
 
-			desc->lli.ctlhi = dlen >> mem_width;
+			lli_write(desc, ctlhi, dlen >> mem_width);
 			desc->len = dlen;
 
 			if (!first) {
 				first = desc;
 			} else {
-				prev->lli.llp = desc->txd.phys;
-				list_add_tail(&desc->desc_node,
-						&first->tx_list);
+				lli_write(prev, llp, desc->txd.phys | lms);
+				list_add_tail(&desc->desc_node, &first->tx_list);
 			}
 			prev = desc;
 			total_len += dlen;
@@ -859,8 +824,6 @@
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
 			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
-		data_width = dw->data_width[dwc->dst_master];
-
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -868,17 +831,16 @@
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = min_t(unsigned int,
-					  data_width, dwc_fast_ffs(mem | len));
+			mem_width = __ffs(data_width | mem | len);
 
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
 			if (!desc)
 				goto err_desc_get;
 
-			desc->lli.sar = reg;
-			desc->lli.dar = mem;
-			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+			lli_write(desc, sar, reg);
+			lli_write(desc, dar, mem);
+			lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
 			if ((len >> reg_width) > dwc->block_size) {
 				dlen = dwc->block_size << reg_width;
 				mem += dlen;
@@ -887,15 +849,14 @@
 				dlen = len;
 				len = 0;
 			}
-			desc->lli.ctlhi = dlen >> reg_width;
+			lli_write(desc, ctlhi, dlen >> reg_width);
 			desc->len = dlen;
 
 			if (!first) {
 				first = desc;
 			} else {
-				prev->lli.llp = desc->txd.phys;
-				list_add_tail(&desc->desc_node,
-						&first->tx_list);
+				lli_write(prev, llp, desc->txd.phys | lms);
+				list_add_tail(&desc->desc_node, &first->tx_list);
 			}
 			prev = desc;
 			total_len += dlen;
@@ -910,9 +871,10 @@
 
 	if (flags & DMA_PREP_INTERRUPT)
 		/* Trigger interrupt after last block */
-		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+		lli_set(prev, ctllo, DWC_CTLL_INT_EN);
 
 	prev->lli.llp = 0;
+	lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
 	first->total_len = total_len;
 
 	return &first->txd;
@@ -937,8 +899,8 @@
 	dwc->src_id = dws->src_id;
 	dwc->dst_id = dws->dst_id;
 
-	dwc->src_master = dws->src_master;
-	dwc->dst_master = dws->dst_master;
+	dwc->m_master = dws->m_master;
+	dwc->p_master = dws->p_master;
 
 	return true;
 }
@@ -991,7 +953,7 @@
 	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
 		udelay(2);
 
-	dwc->paused = true;
+	set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -1004,7 +966,7 @@
 
 	channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
 
-	dwc->paused = false;
+	clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
 }
 
 static int dwc_resume(struct dma_chan *chan)
@@ -1012,12 +974,10 @@
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	unsigned long		flags;
 
-	if (!dwc->paused)
-		return 0;
-
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	dwc_chan_resume(dwc);
+	if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
+		dwc_chan_resume(dwc);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -1053,16 +1013,37 @@
 	return 0;
 }
 
-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc)
+static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
 {
+	struct dw_desc *desc;
+
+	list_for_each_entry(desc, &dwc->active_list, desc_node)
+		if (desc->txd.cookie == c)
+			return desc;
+
+	return NULL;
+}
+
+static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+{
+	struct dw_desc *desc;
 	unsigned long flags;
 	u32 residue;
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	residue = dwc->residue;
-	if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
-		residue -= dwc_get_sent(dwc);
+	desc = dwc_find_desc(dwc, cookie);
+	if (desc) {
+		if (desc == dwc_first_active(dwc)) {
+			residue = desc->residue;
+			if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+				residue -= dwc_get_sent(dwc);
+		} else {
+			residue = desc->total_len;
+		}
+	} else {
+		residue = 0;
+	}
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 	return residue;
@@ -1083,10 +1064,12 @@
 	dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_COMPLETE)
-		dma_set_residue(txstate, dwc_get_residue(dwc));
+	if (ret == DMA_COMPLETE)
+		return ret;
 
-	if (dwc->paused && ret == DMA_IN_PROGRESS)
+	dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+	if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
 		return DMA_PAUSED;
 
 	return ret;
@@ -1107,7 +1090,7 @@
 
 static void dw_dma_off(struct dw_dma *dw)
 {
-	int i;
+	unsigned int i;
 
 	dma_writel(dw, CFG, 0);
 
@@ -1121,7 +1104,7 @@
 		cpu_relax();
 
 	for (i = 0; i < dw->dma.chancnt; i++)
-		dw->chan[i].initialized = false;
+		clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
 }
 
 static void dw_dma_on(struct dw_dma *dw)
@@ -1133,9 +1116,6 @@
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(chan->device);
-	struct dw_desc		*desc;
-	int			i;
-	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
@@ -1166,48 +1146,13 @@
 		dw_dma_on(dw);
 	dw->in_use |= dwc->mask;
 
-	spin_lock_irqsave(&dwc->lock, flags);
-	i = dwc->descs_allocated;
-	while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
-		dma_addr_t phys;
-
-		spin_unlock_irqrestore(&dwc->lock, flags);
-
-		desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys);
-		if (!desc)
-			goto err_desc_alloc;
-
-		memset(desc, 0, sizeof(struct dw_desc));
-
-		INIT_LIST_HEAD(&desc->tx_list);
-		dma_async_tx_descriptor_init(&desc->txd, chan);
-		desc->txd.tx_submit = dwc_tx_submit;
-		desc->txd.flags = DMA_CTRL_ACK;
-		desc->txd.phys = phys;
-
-		dwc_desc_put(dwc, desc);
-
-		spin_lock_irqsave(&dwc->lock, flags);
-		i = ++dwc->descs_allocated;
-	}
-
-	spin_unlock_irqrestore(&dwc->lock, flags);
-
-	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
-
-	return i;
-
-err_desc_alloc:
-	dev_info(chan2dev(chan), "only allocated %d descriptors\n", i);
-
-	return i;
+	return 0;
 }
 
 static void dwc_free_chan_resources(struct dma_chan *chan)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(chan->device);
-	struct dw_desc		*desc, *_desc;
 	unsigned long		flags;
 	LIST_HEAD(list);
 
@@ -1220,17 +1165,15 @@
 	BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	list_splice_init(&dwc->free_list, &list);
-	dwc->descs_allocated = 0;
 
 	/* Clear custom channel configuration */
 	dwc->src_id = 0;
 	dwc->dst_id = 0;
 
-	dwc->src_master = 0;
-	dwc->dst_master = 0;
+	dwc->m_master = 0;
+	dwc->p_master = 0;
 
-	dwc->initialized = false;
+	clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
 
 	/* Disable interrupts */
 	channel_clear_bit(dw, MASK.XFER, dwc->mask);
@@ -1244,11 +1187,6 @@
 	if (!dw->in_use)
 		dw_dma_off(dw);
 
-	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
-		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
-		dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
-	}
-
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
@@ -1326,6 +1264,7 @@
 	struct dw_cyclic_desc		*retval = NULL;
 	struct dw_desc			*desc;
 	struct dw_desc			*last = NULL;
+	u8				lms = DWC_LLP_LMS(dwc->m_master);
 	unsigned long			was_cyclic;
 	unsigned int			reg_width;
 	unsigned int			periods;
@@ -1379,9 +1318,6 @@
 
 	retval = ERR_PTR(-ENOMEM);
 
-	if (periods > NR_DESCS_PER_CHANNEL)
-		goto out_err;
-
 	cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
 	if (!cdesc)
 		goto out_err;
@@ -1397,50 +1333,50 @@
 
 		switch (direction) {
 		case DMA_MEM_TO_DEV:
-			desc->lli.dar = sconfig->dst_addr;
-			desc->lli.sar = buf_addr + (period_len * i);
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
-					| DWC_CTLL_DST_WIDTH(reg_width)
-					| DWC_CTLL_SRC_WIDTH(reg_width)
-					| DWC_CTLL_DST_FIX
-					| DWC_CTLL_SRC_INC
-					| DWC_CTLL_INT_EN);
+			lli_write(desc, dar, sconfig->dst_addr);
+			lli_write(desc, sar, buf_addr + period_len * i);
+			lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_DST_WIDTH(reg_width)
+				| DWC_CTLL_SRC_WIDTH(reg_width)
+				| DWC_CTLL_DST_FIX
+				| DWC_CTLL_SRC_INC
+				| DWC_CTLL_INT_EN));
 
-			desc->lli.ctllo |= sconfig->device_fc ?
-				DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
-				DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+			lli_set(desc, ctllo, sconfig->device_fc ?
+					DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+					DWC_CTLL_FC(DW_DMA_FC_D_M2P));
 
 			break;
 		case DMA_DEV_TO_MEM:
-			desc->lli.dar = buf_addr + (period_len * i);
-			desc->lli.sar = sconfig->src_addr;
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
-					| DWC_CTLL_SRC_WIDTH(reg_width)
-					| DWC_CTLL_DST_WIDTH(reg_width)
-					| DWC_CTLL_DST_INC
-					| DWC_CTLL_SRC_FIX
-					| DWC_CTLL_INT_EN);
+			lli_write(desc, dar, buf_addr + period_len * i);
+			lli_write(desc, sar, sconfig->src_addr);
+			lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_SRC_WIDTH(reg_width)
+				| DWC_CTLL_DST_WIDTH(reg_width)
+				| DWC_CTLL_DST_INC
+				| DWC_CTLL_SRC_FIX
+				| DWC_CTLL_INT_EN));
 
-			desc->lli.ctllo |= sconfig->device_fc ?
-				DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
-				DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+			lli_set(desc, ctllo, sconfig->device_fc ?
+					DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+					DWC_CTLL_FC(DW_DMA_FC_D_P2M));
 
 			break;
 		default:
 			break;
 		}
 
-		desc->lli.ctlhi = (period_len >> reg_width);
+		lli_write(desc, ctlhi, period_len >> reg_width);
 		cdesc->desc[i] = desc;
 
 		if (last)
-			last->lli.llp = desc->txd.phys;
+			lli_write(last, llp, desc->txd.phys | lms);
 
 		last = desc;
 	}
 
 	/* Let's make a cyclic list */
-	last->lli.llp = cdesc->desc[0]->txd.phys;
+	lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
 
 	dev_dbg(chan2dev(&dwc->chan),
 			"cyclic prepared buf %pad len %zu period %zu periods %d\n",
@@ -1471,7 +1407,7 @@
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
 	struct dw_cyclic_desc	*cdesc = dwc->cdesc;
-	int			i;
+	unsigned int		i;
 	unsigned long		flags;
 
 	dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
@@ -1495,32 +1431,38 @@
 	kfree(cdesc->desc);
 	kfree(cdesc);
 
+	dwc->cdesc = NULL;
+
 	clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
 }
 EXPORT_SYMBOL(dw_dma_cyclic_free);
 
 /*----------------------------------------------------------------------*/
 
-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
+int dw_dma_probe(struct dw_dma_chip *chip)
 {
+	struct dw_dma_platform_data *pdata;
 	struct dw_dma		*dw;
 	bool			autocfg = false;
 	unsigned int		dw_params;
-	unsigned int		max_blk_size = 0;
+	unsigned int		i;
 	int			err;
-	int			i;
 
 	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
 	if (!dw)
 		return -ENOMEM;
 
+	dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
+	if (!dw->pdata)
+		return -ENOMEM;
+
 	dw->regs = chip->regs;
 	chip->dw = dw;
 
 	pm_runtime_get_sync(chip->dev);
 
-	if (!pdata) {
-		dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
+	if (!chip->pdata) {
+		dw_params = dma_readl(dw, DW_PARAMS);
 		dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
 
 		autocfg = dw_params >> DW_PARAMS_EN & 1;
@@ -1529,29 +1471,31 @@
 			goto err_pdata;
 		}
 
-		pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata) {
-			err = -ENOMEM;
-			goto err_pdata;
-		}
+		/* Reassign the platform data pointer */
+		pdata = dw->pdata;
 
 		/* Get hardware configuration parameters */
 		pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
 		pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
 		for (i = 0; i < pdata->nr_masters; i++) {
 			pdata->data_width[i] =
-				(dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
+				4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
 		}
-		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
+		pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
 
 		/* Fill platform data with the default values */
 		pdata->is_private = true;
 		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
-	} else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+	} else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
 		err = -EINVAL;
 		goto err_pdata;
+	} else {
+		memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
+
+		/* Reassign the platform data pointer */
+		pdata = dw->pdata;
 	}
 
 	dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
@@ -1561,11 +1505,6 @@
 		goto err_pdata;
 	}
 
-	/* Get hardware configuration parameters */
-	dw->nr_masters = pdata->nr_masters;
-	for (i = 0; i < dw->nr_masters; i++)
-		dw->data_width[i] = pdata->data_width[i];
-
 	/* Calculate all channel mask before DMA setup */
 	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
@@ -1612,7 +1551,6 @@
 
 		INIT_LIST_HEAD(&dwc->active_list);
 		INIT_LIST_HEAD(&dwc->queue);
-		INIT_LIST_HEAD(&dwc->free_list);
 
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 
@@ -1620,11 +1558,9 @@
 
 		/* Hardware configuration */
 		if (autocfg) {
-			unsigned int dwc_params;
 			unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
-			void __iomem *addr = chip->regs + r * sizeof(u32);
-
-			dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
+			void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
+			unsigned int dwc_params = dma_readl_native(addr);
 
 			dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
 					   dwc_params);
@@ -1635,16 +1571,15 @@
 			 * up to 0x0a for 4095.
 			 */
 			dwc->block_size =
-				(4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
+				(4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
 			dwc->nollp =
 				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
 		} else {
 			dwc->block_size = pdata->block_size;
 
 			/* Check if channel supports multi block transfer */
-			channel_writel(dwc, LLP, 0xfffffffc);
-			dwc->nollp =
-				(channel_readl(dwc, LLP) & 0xfffffffc) == 0;
+			channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
+			dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
 			channel_writel(dwc, LLP, 0);
 		}
 	}
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 358f968..0ae6c3b 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -17,8 +17,8 @@
 
 static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
+	const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
 	struct dw_dma_chip *chip;
-	struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
 	int ret;
 
 	ret = pcim_enable_device(pdev);
@@ -49,8 +49,9 @@
 	chip->dev = &pdev->dev;
 	chip->regs = pcim_iomap_table(pdev)[0];
 	chip->irq = pdev->irq;
+	chip->pdata = pdata;
 
-	ret = dw_dma_probe(chip, pdata);
+	ret = dw_dma_probe(chip);
 	if (ret)
 		return ret;
 
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 26edbe3..5bda0eb 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -42,13 +42,13 @@
 
 	slave.src_id = dma_spec->args[0];
 	slave.dst_id = dma_spec->args[0];
-	slave.src_master = dma_spec->args[1];
-	slave.dst_master = dma_spec->args[2];
+	slave.m_master = dma_spec->args[1];
+	slave.p_master = dma_spec->args[2];
 
 	if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
 		    slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
-		    slave.src_master >= dw->nr_masters ||
-		    slave.dst_master >= dw->nr_masters))
+		    slave.m_master >= dw->pdata->nr_masters ||
+		    slave.p_master >= dw->pdata->nr_masters))
 		return NULL;
 
 	dma_cap_zero(cap);
@@ -66,8 +66,8 @@
 		.dma_dev = dma_spec->dev,
 		.src_id = dma_spec->slave_id,
 		.dst_id = dma_spec->slave_id,
-		.src_master = 1,
-		.dst_master = 0,
+		.m_master = 0,
+		.p_master = 1,
 	};
 
 	return dw_dma_filter(chan, &slave);
@@ -103,6 +103,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
 	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
+	u32 nr_masters;
 	u32 nr_channels;
 
 	if (!np) {
@@ -110,6 +111,11 @@
 		return NULL;
 	}
 
+	if (of_property_read_u32(np, "dma-masters", &nr_masters))
+		return NULL;
+	if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
+		return NULL;
+
 	if (of_property_read_u32(np, "dma-channels", &nr_channels))
 		return NULL;
 
@@ -117,6 +123,7 @@
 	if (!pdata)
 		return NULL;
 
+	pdata->nr_masters = nr_masters;
 	pdata->nr_channels = nr_channels;
 
 	if (of_property_read_bool(np, "is_private"))
@@ -131,17 +138,13 @@
 	if (!of_property_read_u32(np, "block_size", &tmp))
 		pdata->block_size = tmp;
 
-	if (!of_property_read_u32(np, "dma-masters", &tmp)) {
-		if (tmp > DW_DMA_MAX_NR_MASTERS)
-			return NULL;
-
-		pdata->nr_masters = tmp;
-	}
-
-	if (!of_property_read_u32_array(np, "data_width", arr,
-				pdata->nr_masters))
-		for (tmp = 0; tmp < pdata->nr_masters; tmp++)
+	if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
+		for (tmp = 0; tmp < nr_masters; tmp++)
 			pdata->data_width[tmp] = arr[tmp];
+	} else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
+		for (tmp = 0; tmp < nr_masters; tmp++)
+			pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
+	}
 
 	return pdata;
 }
@@ -158,7 +161,7 @@
 	struct dw_dma_chip *chip;
 	struct device *dev = &pdev->dev;
 	struct resource *mem;
-	struct dw_dma_platform_data *pdata;
+	const struct dw_dma_platform_data *pdata;
 	int err;
 
 	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
@@ -183,6 +186,7 @@
 		pdata = dw_dma_parse_dt(pdev);
 
 	chip->dev = dev;
+	chip->pdata = pdata;
 
 	chip->clk = devm_clk_get(chip->dev, "hclk");
 	if (IS_ERR(chip->clk))
@@ -193,7 +197,7 @@
 
 	pm_runtime_enable(&pdev->dev);
 
-	err = dw_dma_probe(chip, pdata);
+	err = dw_dma_probe(chip);
 	if (err)
 		goto err_dw_dma_probe;
 
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 0a50c18..4b7bd78 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -114,10 +114,6 @@
 #define dma_writel_native writel
 #endif
 
-/* To access the registers in early stage of probe */
-#define dma_read_byaddr(addr, name) \
-	dma_readl_native((addr) + offsetof(struct dw_dma_regs, name))
-
 /* Bitfields in DW_PARAMS */
 #define DW_PARAMS_NR_CHAN	8		/* number of channels */
 #define DW_PARAMS_NR_MASTER	11		/* number of AHB masters */
@@ -143,6 +139,10 @@
 	DW_DMA_MSIZE_256,
 };
 
+/* Bitfields in LLP */
+#define DWC_LLP_LMS(x)		((x) & 3)	/* list master select */
+#define DWC_LLP_LOC(x)		((x) & ~3)	/* next lli */
+
 /* Bitfields in CTL_LO */
 #define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
 #define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
@@ -216,6 +216,8 @@
 enum dw_dmac_flags {
 	DW_DMA_IS_CYCLIC = 0,
 	DW_DMA_IS_SOFT_LLP = 1,
+	DW_DMA_IS_PAUSED = 2,
+	DW_DMA_IS_INITIALIZED = 3,
 };
 
 struct dw_dma_chan {
@@ -224,8 +226,6 @@
 	u8				mask;
 	u8				priority;
 	enum dma_transfer_direction	direction;
-	bool				paused;
-	bool				initialized;
 
 	/* software emulation of the LLP transfers */
 	struct list_head	*tx_node_active;
@@ -236,8 +236,6 @@
 	unsigned long		flags;
 	struct list_head	active_list;
 	struct list_head	queue;
-	struct list_head	free_list;
-	u32			residue;
 	struct dw_cyclic_desc	*cdesc;
 
 	unsigned int		descs_allocated;
@@ -249,8 +247,8 @@
 	/* custom slave configuration */
 	u8			src_id;
 	u8			dst_id;
-	u8			src_master;
-	u8			dst_master;
+	u8			m_master;
+	u8			p_master;
 
 	/* configuration passed via .device_config */
 	struct dma_slave_config dma_sconfig;
@@ -283,9 +281,8 @@
 	u8			all_chan_mask;
 	u8			in_use;
 
-	/* hardware configuration */
-	unsigned char		nr_masters;
-	unsigned char		data_width[DW_DMA_MAX_NR_MASTERS];
+	/* platform data */
+	struct dw_dma_platform_data	*pdata;
 };
 
 static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
@@ -308,32 +305,51 @@
 	return container_of(ddev, struct dw_dma, dma);
 }
 
+#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
+typedef __be32 __dw32;
+#else
+typedef __le32 __dw32;
+#endif
+
 /* LLI == Linked List Item; a.k.a. DMA block descriptor */
 struct dw_lli {
 	/* values that are not changed by hardware */
-	u32		sar;
-	u32		dar;
-	u32		llp;		/* chain to next lli */
-	u32		ctllo;
+	__dw32		sar;
+	__dw32		dar;
+	__dw32		llp;		/* chain to next lli */
+	__dw32		ctllo;
 	/* values that may get written back: */
-	u32		ctlhi;
+	__dw32		ctlhi;
 	/* sstat and dstat can snapshot peripheral register state.
 	 * silicon config may discard either or both...
 	 */
-	u32		sstat;
-	u32		dstat;
+	__dw32		sstat;
+	__dw32		dstat;
 };
 
 struct dw_desc {
 	/* FIRST values the hardware uses */
 	struct dw_lli			lli;
 
+#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
+#define lli_set(d, reg, v)		((d)->lli.reg |= cpu_to_be32(v))
+#define lli_clear(d, reg, v)		((d)->lli.reg &= ~cpu_to_be32(v))
+#define lli_read(d, reg)		be32_to_cpu((d)->lli.reg)
+#define lli_write(d, reg, v)		((d)->lli.reg = cpu_to_be32(v))
+#else
+#define lli_set(d, reg, v)		((d)->lli.reg |= cpu_to_le32(v))
+#define lli_clear(d, reg, v)		((d)->lli.reg &= ~cpu_to_le32(v))
+#define lli_read(d, reg)		le32_to_cpu((d)->lli.reg)
+#define lli_write(d, reg, v)		((d)->lli.reg = cpu_to_le32(v))
+#endif
+
 	/* THEN values for driver housekeeping */
 	struct list_head		desc_node;
 	struct list_head		tx_list;
 	struct dma_async_tx_descriptor	txd;
 	size_t				len;
 	size_t				total_len;
+	u32				residue;
 };
 
 #define to_dw_desc(h)	list_entry(h, struct dw_desc, desc_node)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 04070ba..8181ed1 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1537,8 +1537,17 @@
 
 	dev_vdbg(ecc->dev, "dma_ccerr_handler\n");
 
-	if (!edma_error_pending(ecc))
+	if (!edma_error_pending(ecc)) {
+		/*
+		 * The registers indicate no pending error event but the irq
+		 * handler has been called.
+		 * Ask eDMA to re-evaluate the error registers.
+		 */
+		dev_err(ecc->dev, "%s: Error interrupt without error event!\n",
+			__func__);
+		edma_write(ecc, EDMA_EEVAL, 1);
 		return IRQ_NONE;
+	}
 
 	while (1) {
 		/* Event missed register(s) */
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index aac85c3..a8828ed 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -462,13 +462,12 @@
 	struct fsl_desc_sw *desc;
 	dma_addr_t pdesc;
 
-	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
 	if (!desc) {
 		chan_dbg(chan, "out of memory for link descriptor\n");
 		return NULL;
 	}
 
-	memset(desc, 0, sizeof(*desc));
 	INIT_LIST_HEAD(&desc->tx_list);
 	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
 	desc->async_tx.tx_submit = fsl_dma_tx_submit;
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index ee51051..f8c5cd5 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -77,8 +77,8 @@
 	hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr);
 
 	/* Set descriptors */
-	count = (desc->nents - desc->active) % HSU_DMA_CHAN_NR_DESC;
-	for (i = 0; i < count; i++) {
+	count = desc->nents - desc->active;
+	for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) {
 		hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr);
 		hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len);
 
@@ -160,7 +160,7 @@
 		return IRQ_NONE;
 
 	/* Timeout IRQ, need wait some time, see Errata 2 */
-	if (hsuc->direction == DMA_DEV_TO_MEM && (sr & HSU_CH_SR_DESCTO_ANY))
+	if (sr & HSU_CH_SR_DESCTO_ANY)
 		udelay(2);
 
 	sr &= ~HSU_CH_SR_DESCTO_ANY;
@@ -420,6 +420,8 @@
 
 	hsu->dma.dev = chip->dev;
 
+	dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK);
+
 	ret = dma_async_device_register(&hsu->dma);
 	if (ret)
 		return ret;
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index 6b070c2..486b023b 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -58,6 +58,10 @@
 #define HSU_CH_DCR_CHEI		BIT(23)
 #define HSU_CH_DCR_CHTOI(x)	BIT(24 + (x))
 
+/* Bits in HSU_CH_DxTSR */
+#define HSU_CH_DxTSR_MASK	GENMASK(15, 0)
+#define HSU_CH_DxTSR_TSR(x)	((x) & HSU_CH_DxTSR_MASK)
+
 struct hsu_dma_sg {
 	dma_addr_t addr;
 	unsigned int len;
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index efdee1a..d406056 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -690,12 +690,11 @@
 	/* allocate a completion writeback area */
 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
 	ioat_chan->completion =
-		dma_pool_alloc(ioat_chan->ioat_dma->completion_pool,
-			       GFP_KERNEL, &ioat_chan->completion_dma);
+		dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
+				GFP_KERNEL, &ioat_chan->completion_dma);
 	if (!ioat_chan->completion)
 		return -ENOMEM;
 
-	memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion));
 	writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF,
 	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
 	writel(((u64)ioat_chan->completion_dma) >> 32,
@@ -1074,6 +1073,7 @@
 	struct ioatdma_chan *ioat_chan;
 	bool is_raid_device = false;
 	int err;
+	u16 val16;
 
 	dma = &ioat_dma->dma_dev;
 	dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock;
@@ -1173,6 +1173,17 @@
 	if (dca)
 		ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
 
+	/* disable relaxed ordering */
+	err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16);
+	if (err)
+		return err;
+
+	/* clear relaxed ordering enable */
+	val16 &= ~IOAT_DEVCTRL_ROE;
+	err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16);
+	if (err)
+		return err;
+
 	return 0;
 }
 
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 4994a36..7053498 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -26,6 +26,13 @@
 #define IOAT_PCI_CHANERR_INT_OFFSET		0x180
 #define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
 
+/* PCIe config registers */
+
+/* EXPCAPID + N */
+#define IOAT_DEVCTRL_OFFSET			0x8
+/* relaxed ordering enable */
+#define IOAT_DEVCTRL_ROE			0x10
+
 /* MMIO Device Registers */
 #define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
 
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index e39457f..56f1fd6 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -364,13 +364,12 @@
 	struct mmp_pdma_desc_sw *desc;
 	dma_addr_t pdesc;
 
-	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
 	if (!desc) {
 		dev_err(chan->dev, "out of memory for link descriptor\n");
 		return NULL;
 	}
 
-	memset(desc, 0, sizeof(*desc));
 	INIT_LIST_HEAD(&desc->tx_list);
 	dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
 	/* each desc has submit */
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index aae76fb..ccadafa 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -3,6 +3,7 @@
  * Copyright (C) Semihalf 2009
  * Copyright (C) Ilya Yanok, Emcraft Systems 2010
  * Copyright (C) Alexander Popov, Promcontroller 2014
+ * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016
  *
  * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
  * (defines, structures and comments) was taken from MPC5121 DMA driver
@@ -26,18 +27,19 @@
  */
 
 /*
- * MPC512x and MPC8308 DMA driver. It supports
- * memory to memory data transfers (tested using dmatest module) and
- * data transfers between memory and peripheral I/O memory
- * by means of slave scatter/gather with these limitations:
- *  - chunked transfers (described by s/g lists with more than one item)
- *     are refused as long as proper support for scatter/gather is missing;
- *  - transfers on MPC8308 always start from software as this SoC appears
- *     not to have external request lines for peripheral flow control;
- *  - only peripheral devices with 4-byte FIFO access register are supported;
- *  - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently
- *     source and destination addresses must be 4-byte aligned
- *     and transfer size must be aligned on (4 * maxburst) boundary;
+ * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers
+ * (tested using dmatest module) and data transfers between memory and
+ * peripheral I/O memory by means of slave scatter/gather with these
+ * limitations:
+ *  - chunked transfers (described by s/g lists with more than one item) are
+ *     refused as long as proper support for scatter/gather is missing
+ *  - transfers on MPC8308 always start from software as this SoC does not have
+ *     external request lines for peripheral flow control
+ *  - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for
+ *     MPC512x), and 32 bytes are supported, and, consequently, source
+ *     addresses and destination addresses must be aligned accordingly;
+ *     furthermore, for MPC512x SoCs, the transfer size must be aligned on
+ *     (chunk size * maxburst)
  */
 
 #include <linux/module.h>
@@ -213,8 +215,10 @@
 	/* Settings for access to peripheral FIFO */
 	dma_addr_t			src_per_paddr;
 	u32				src_tcd_nunits;
+	u8				swidth;
 	dma_addr_t			dst_per_paddr;
 	u32				dst_tcd_nunits;
+	u8				dwidth;
 
 	/* Lock for this structure */
 	spinlock_t			lock;
@@ -247,6 +251,7 @@
 static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c)
 {
 	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c);
+
 	return container_of(mchan, struct mpc_dma, channels[c->chan_id]);
 }
 
@@ -254,9 +259,9 @@
  * Execute all queued DMA descriptors.
  *
  * Following requirements must be met while calling mpc_dma_execute():
- * 	a) mchan->lock is acquired,
- * 	b) mchan->active list is empty,
- * 	c) mchan->queued list contains at least one entry.
+ *	a) mchan->lock is acquired,
+ *	b) mchan->active list is empty,
+ *	c) mchan->queued list contains at least one entry.
  */
 static void mpc_dma_execute(struct mpc_dma_chan *mchan)
 {
@@ -446,20 +451,15 @@
 		if (es & MPC_DMA_DMAES_SAE)
 			dev_err(mdma->dma.dev, "- Source Address Error\n");
 		if (es & MPC_DMA_DMAES_SOE)
-			dev_err(mdma->dma.dev, "- Source Offset"
-						" Configuration Error\n");
+			dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n");
 		if (es & MPC_DMA_DMAES_DAE)
-			dev_err(mdma->dma.dev, "- Destination Address"
-								" Error\n");
+			dev_err(mdma->dma.dev, "- Destination Address Error\n");
 		if (es & MPC_DMA_DMAES_DOE)
-			dev_err(mdma->dma.dev, "- Destination Offset"
-						" Configuration Error\n");
+			dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n");
 		if (es & MPC_DMA_DMAES_NCE)
-			dev_err(mdma->dma.dev, "- NBytes/Citter"
-						" Configuration Error\n");
+			dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n");
 		if (es & MPC_DMA_DMAES_SGE)
-			dev_err(mdma->dma.dev, "- Scatter/Gather"
-						" Configuration Error\n");
+			dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n");
 		if (es & MPC_DMA_DMAES_SBE)
 			dev_err(mdma->dma.dev, "- Source Bus Error\n");
 		if (es & MPC_DMA_DMAES_DBE)
@@ -518,8 +518,8 @@
 	for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) {
 		mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL);
 		if (!mdesc) {
-			dev_notice(mdma->dma.dev, "Memory allocation error. "
-					"Allocated only %u descriptors\n", i);
+			dev_notice(mdma->dma.dev,
+				"Memory allocation error. Allocated only %u descriptors\n", i);
 			break;
 		}
 
@@ -684,6 +684,15 @@
 	return &mdesc->desc;
 }
 
+inline u8 buswidth_to_dmatsize(u8 buswidth)
+{
+	u8 res;
+
+	for (res = 0; buswidth > 1; buswidth /= 2)
+		res++;
+	return res;
+}
+
 static struct dma_async_tx_descriptor *
 mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -742,39 +751,54 @@
 
 		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
 
-		if (!IS_ALIGNED(sg_dma_address(sg), 4))
-			goto err_prep;
-
 		if (direction == DMA_DEV_TO_MEM) {
 			tcd->saddr = per_paddr;
 			tcd->daddr = sg_dma_address(sg);
+
+			if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth))
+				goto err_prep;
+
 			tcd->soff = 0;
-			tcd->doff = 4;
+			tcd->doff = mchan->dwidth;
 		} else {
 			tcd->saddr = sg_dma_address(sg);
 			tcd->daddr = per_paddr;
-			tcd->soff = 4;
+
+			if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth))
+				goto err_prep;
+
+			tcd->soff = mchan->swidth;
 			tcd->doff = 0;
 		}
 
-		tcd->ssize = MPC_DMA_TSIZE_4;
-		tcd->dsize = MPC_DMA_TSIZE_4;
+		tcd->ssize = buswidth_to_dmatsize(mchan->swidth);
+		tcd->dsize = buswidth_to_dmatsize(mchan->dwidth);
 
-		len = sg_dma_len(sg);
-		tcd->nbytes = tcd_nunits * 4;
-		if (!IS_ALIGNED(len, tcd->nbytes))
-			goto err_prep;
+		if (mdma->is_mpc8308) {
+			tcd->nbytes = sg_dma_len(sg);
+			if (!IS_ALIGNED(tcd->nbytes, mchan->swidth))
+				goto err_prep;
 
-		iter = len / tcd->nbytes;
-		if (iter >= 1 << 15) {
-			/* len is too big */
-			goto err_prep;
+			/* No major loops for MPC8303 */
+			tcd->biter = 1;
+			tcd->citer = 1;
+		} else {
+			len = sg_dma_len(sg);
+			tcd->nbytes = tcd_nunits * tcd->ssize;
+			if (!IS_ALIGNED(len, tcd->nbytes))
+				goto err_prep;
+
+			iter = len / tcd->nbytes;
+			if (iter >= 1 << 15) {
+				/* len is too big */
+				goto err_prep;
+			}
+			/* citer_linkch contains the high bits of iter */
+			tcd->biter = iter & 0x1ff;
+			tcd->biter_linkch = iter >> 9;
+			tcd->citer = tcd->biter;
+			tcd->citer_linkch = tcd->biter_linkch;
 		}
-		/* citer_linkch contains the high bits of iter */
-		tcd->biter = iter & 0x1ff;
-		tcd->biter_linkch = iter >> 9;
-		tcd->citer = tcd->biter;
-		tcd->citer_linkch = tcd->biter_linkch;
 
 		tcd->e_sg = 0;
 		tcd->d_req = 1;
@@ -796,40 +820,62 @@
 	return NULL;
 }
 
+inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308)
+{
+	switch (buswidth) {
+	case 16:
+		if (is_mpc8308)
+			return false;
+	case 1:
+	case 2:
+	case 4:
+	case 32:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 static int mpc_dma_device_config(struct dma_chan *chan,
 				 struct dma_slave_config *cfg)
 {
 	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
 	unsigned long flags;
 
 	/*
 	 * Software constraints:
-	 *  - only transfers between a peripheral device and
-	 *     memory are supported;
-	 *  - only peripheral devices with 4-byte FIFO access register
-	 *     are supported;
-	 *  - minimal transfer chunk is 4 bytes and consequently
-	 *     source and destination addresses must be 4-byte aligned
-	 *     and transfer size must be aligned on (4 * maxburst)
-	 *     boundary;
-	 *  - during the transfer RAM address is being incremented by
-	 *     the size of minimal transfer chunk;
-	 *  - peripheral port's address is constant during the transfer.
+	 *  - only transfers between a peripheral device and memory are
+	 *     supported
+	 *  - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes
+	 *     are supported, and, consequently, source addresses and
+	 *     destination addresses; must be aligned accordingly; furthermore,
+	 *     for MPC512x SoCs, the transfer size must be aligned on (chunk
+	 *     size * maxburst)
+	 *  - during the transfer, the RAM address is incremented by the size
+	 *     of transfer chunk
+	 *  - the peripheral port's address is constant during the transfer.
 	 */
 
-	if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
-	    cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES ||
-	    !IS_ALIGNED(cfg->src_addr, 4) ||
-	    !IS_ALIGNED(cfg->dst_addr, 4)) {
+	if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) ||
+	    !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) {
 		return -EINVAL;
 	}
 
+	if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) ||
+	    !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308))
+		return -EINVAL;
+
 	spin_lock_irqsave(&mchan->lock, flags);
 
 	mchan->src_per_paddr = cfg->src_addr;
 	mchan->src_tcd_nunits = cfg->src_maxburst;
+	mchan->swidth = cfg->src_addr_width;
 	mchan->dst_per_paddr = cfg->dst_addr;
 	mchan->dst_tcd_nunits = cfg->dst_maxburst;
+	mchan->dwidth = cfg->dst_addr_width;
 
 	/* Apply defaults */
 	if (mchan->src_tcd_nunits == 0)
@@ -875,7 +921,6 @@
 
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
-		dev_err(dev, "Memory exhausted!\n");
 		retval = -ENOMEM;
 		goto err;
 	}
@@ -999,7 +1044,8 @@
 		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
 	} else {
 		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
-					MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
+						MPC_DMA_DMACR_ERGA |
+						MPC_DMA_DMACR_ERCA);
 
 		/* Disable hardware DMA requests */
 		out_be32(&mdma->regs->dmaerqh, 0);
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 3922a5d..25d1dad 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -31,6 +31,12 @@
 #include "dmaengine.h"
 #include "mv_xor.h"
 
+enum mv_xor_type {
+	XOR_ORION,
+	XOR_ARMADA_38X,
+	XOR_ARMADA_37XX,
+};
+
 enum mv_xor_mode {
 	XOR_MODE_IN_REG,
 	XOR_MODE_IN_DESC,
@@ -477,7 +483,7 @@
 	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
 
 	dev_dbg(mv_chan_to_devp(mv_chan),
-		"%s src_cnt: %d len: %u dest %pad flags: %ld\n",
+		"%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
 		__func__, src_cnt, len, &dest, flags);
 
 	sw_desc = mv_chan_alloc_slot(mv_chan);
@@ -933,7 +939,7 @@
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc)
+		   int idx, dma_cap_mask_t cap_mask, int irq)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -945,7 +951,10 @@
 
 	mv_chan->idx = idx;
 	mv_chan->irq = irq;
-	mv_chan->op_in_desc = op_in_desc;
+	if (xordev->xor_type == XOR_ORION)
+		mv_chan->op_in_desc = XOR_MODE_IN_REG;
+	else
+		mv_chan->op_in_desc = XOR_MODE_IN_DESC;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1085,6 +1094,33 @@
 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
 }
 
+static void
+mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev)
+{
+	void __iomem *base = xordev->xor_high_base;
+	u32 win_enable = 0;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+	/*
+	 * For Armada3700 open default 4GB Mbus window. The dram
+	 * related configuration are done at AXIS level.
+	 */
+	writel(0xffff0000, base + WINDOW_SIZE(0));
+	win_enable |= 1;
+	win_enable |= 3 << 16;
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
+}
+
 /*
  * Since this XOR driver is basically used only for RAID5, we don't
  * need to care about synchronizing ->suspend with DMA activity,
@@ -1129,6 +1165,11 @@
 			       XOR_INTR_MASK(mv_chan));
 	}
 
+	if (xordev->xor_type == XOR_ARMADA_37XX) {
+		mv_xor_conf_mbus_windows_a3700(xordev);
+		return 0;
+	}
+
 	dram = mv_mbus_dram_info();
 	if (dram)
 		mv_xor_conf_mbus_windows(xordev, dram);
@@ -1137,8 +1178,9 @@
 }
 
 static const struct of_device_id mv_xor_dt_ids[] = {
-	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG },
-	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC },
+	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION },
+	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X },
+	{ .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX },
 	{},
 };
 
@@ -1152,7 +1194,6 @@
 	struct resource *res;
 	unsigned int max_engines, max_channels;
 	int i, ret;
-	int op_in_desc;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1180,12 +1221,30 @@
 
 	platform_set_drvdata(pdev, xordev);
 
+
+	/*
+	 * We need to know which type of XOR device we use before
+	 * setting up. In non-dt case it can only be the legacy one.
+	 */
+	xordev->xor_type = XOR_ORION;
+	if (pdev->dev.of_node) {
+		const struct of_device_id *of_id =
+			of_match_device(mv_xor_dt_ids,
+					&pdev->dev);
+
+		xordev->xor_type = (uintptr_t)of_id->data;
+	}
+
 	/*
 	 * (Re-)program MBUS remapping windows if we are asked to.
 	 */
-	dram = mv_mbus_dram_info();
-	if (dram)
-		mv_xor_conf_mbus_windows(xordev, dram);
+	if (xordev->xor_type == XOR_ARMADA_37XX) {
+		mv_xor_conf_mbus_windows_a3700(xordev);
+	} else {
+		dram = mv_mbus_dram_info();
+		if (dram)
+			mv_xor_conf_mbus_windows(xordev, dram);
+	}
 
 	/* Not all platforms can gate the clock, so it is not
 	 * an error if the clock does not exists.
@@ -1199,12 +1258,16 @@
 	 * order for async_tx to perform well. So we limit the number
 	 * of engines and channels so that we take into account this
 	 * constraint. Note that we also want to use channels from
-	 * separate engines when possible.
+	 * separate engines when possible.  For dual-CPU Armada 3700
+	 * SoC with single XOR engine allow using its both channels.
 	 */
 	max_engines = num_present_cpus();
-	max_channels = min_t(unsigned int,
-			     MV_XOR_MAX_CHANNELS,
-			     DIV_ROUND_UP(num_present_cpus(), 2));
+	if (xordev->xor_type == XOR_ARMADA_37XX)
+		max_channels =	num_present_cpus();
+	else
+		max_channels = min_t(unsigned int,
+				     MV_XOR_MAX_CHANNELS,
+				     DIV_ROUND_UP(num_present_cpus(), 2));
 
 	if (mv_xor_engine_count >= max_engines)
 		return 0;
@@ -1212,15 +1275,11 @@
 	if (pdev->dev.of_node) {
 		struct device_node *np;
 		int i = 0;
-		const struct of_device_id *of_id =
-			of_match_device(mv_xor_dt_ids,
-					&pdev->dev);
 
 		for_each_child_of_node(pdev->dev.of_node, np) {
 			struct mv_xor_chan *chan;
 			dma_cap_mask_t cap_mask;
 			int irq;
-			op_in_desc = (int)of_id->data;
 
 			if (i >= max_channels)
 				continue;
@@ -1237,7 +1296,7 @@
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cap_mask, irq, op_in_desc);
+						  cap_mask, irq);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				irq_dispose_mapping(irq);
@@ -1266,8 +1325,7 @@
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cd->cap_mask, irq,
-						  XOR_MODE_IN_REG);
+						  cd->cap_mask, irq);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				goto err_channel_add;
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index c19fe30..bf56e08 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -85,6 +85,7 @@
 	void __iomem	     *xor_high_base;
 	struct clk	     *clk;
 	struct mv_xor_chan   *channels[MV_XOR_MAX_CHANNELS];
+	int		     xor_type;
 };
 
 /**
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index 1e1f298..faae0bf 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -240,8 +240,9 @@
 	struct of_phandle_args	dma_spec;
 	struct of_dma		*ofdma;
 	struct dma_chan		*chan;
-	int			count, i;
+	int			count, i, start;
 	int			ret_no_channel = -ENODEV;
+	static atomic_t		last_index;
 
 	if (!np || !name) {
 		pr_err("%s: not enough information provided\n", __func__);
@@ -259,8 +260,15 @@
 		return ERR_PTR(-ENODEV);
 	}
 
+	/*
+	 * approximate an average distribution across multiple
+	 * entries with the same name
+	 */
+	start = atomic_inc_return(&last_index);
 	for (i = 0; i < count; i++) {
-		if (of_dma_match_channel(np, name, i, &dma_spec))
+		if (of_dma_match_channel(np, name,
+					 (i + start) % count,
+					 &dma_spec))
 			continue;
 
 		mutex_lock(&of_dma_lock);
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index 77c1c44..e756a30c 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -117,6 +117,7 @@
 	/* protected by vc->lock */
 	struct pxad_phy		*phy;
 	struct dma_pool		*desc_pool;	/* Descriptors pool */
+	dma_cookie_t		bus_error;
 };
 
 struct pxad_device {
@@ -563,6 +564,7 @@
 			return;
 		}
 	}
+	chan->bus_error = 0;
 
 	/*
 	 * Program the descriptor's address into the DMA controller,
@@ -666,6 +668,7 @@
 	struct virt_dma_desc *vd, *tmp;
 	unsigned int dcsr;
 	unsigned long flags;
+	dma_cookie_t last_started = 0;
 
 	BUG_ON(!chan);
 
@@ -678,6 +681,7 @@
 		dev_dbg(&chan->vc.chan.dev->device,
 			"%s(): checking txd %p[%x]: completed=%d\n",
 			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+		last_started = vd->tx.cookie;
 		if (to_pxad_sw_desc(vd)->cyclic) {
 			vchan_cyclic_callback(vd);
 			break;
@@ -690,7 +694,12 @@
 		}
 	}
 
-	if (dcsr & PXA_DCSR_STOPSTATE) {
+	if (dcsr & PXA_DCSR_BUSERR) {
+		chan->bus_error = last_started;
+		phy_disable(phy);
+	}
+
+	if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) {
 		dev_dbg(&chan->vc.chan.dev->device,
 		"%s(): channel stopped, submitted_empty=%d issued_empty=%d",
 			__func__,
@@ -1249,6 +1258,9 @@
 	struct pxad_chan *chan = to_pxad_chan(dchan);
 	enum dma_status ret;
 
+	if (cookie == chan->bus_error)
+		return DMA_ERROR;
+
 	ret = dma_cookie_status(dchan, cookie, txstate);
 	if (likely(txstate && (ret != DMA_ERROR)))
 		dma_set_residue(txstate, pxad_residue(chan, cookie));
@@ -1321,7 +1333,7 @@
 	return 0;
 }
 
-static const struct of_device_id const pxad_dt_ids[] = {
+static const struct of_device_id pxad_dt_ids[] = {
 	{ .compatible = "marvell,pdma-1.0", },
 	{}
 };
diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile
index bfea699..4bfc38b 100644
--- a/drivers/dma/qcom/Makefile
+++ b/drivers/dma/qcom/Makefile
@@ -1,3 +1,5 @@
 obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o
 obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o
 hdma_mgmt-objs	 := hidma_mgmt.o hidma_mgmt_sys.o
+obj-$(CONFIG_QCOM_HIDMA) +=  hdma.o
+hdma-objs        := hidma_ll.o hidma.o hidma_dbg.o
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index d5e0a9c..969b481 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -342,7 +342,7 @@
 
 #define BAM_DESC_FIFO_SIZE	SZ_32K
 #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
-#define BAM_MAX_DATA_SIZE	(SZ_32K - 8)
+#define BAM_FIFO_SIZE	(SZ_32K - 8)
 
 struct bam_chan {
 	struct virt_dma_chan vc;
@@ -387,6 +387,7 @@
 
 	/* execution environment ID, from DT */
 	u32 ee;
+	bool controlled_remotely;
 
 	const struct reg_offset_data *layout;
 
@@ -458,7 +459,7 @@
 	 */
 	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
 			bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
-	writel_relaxed(BAM_DESC_FIFO_SIZE,
+	writel_relaxed(BAM_FIFO_SIZE,
 			bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
 
 	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
@@ -604,7 +605,7 @@
 
 	/* calculate number of required entries */
 	for_each_sg(sgl, sg, sg_len, i)
-		num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE);
+		num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE);
 
 	/* allocate enough room to accomodate the number of entries */
 	async_desc = kzalloc(sizeof(*async_desc) +
@@ -635,10 +636,10 @@
 			desc->addr = cpu_to_le32(sg_dma_address(sg) +
 						 curr_offset);
 
-			if (remainder > BAM_MAX_DATA_SIZE) {
-				desc->size = cpu_to_le16(BAM_MAX_DATA_SIZE);
-				remainder -= BAM_MAX_DATA_SIZE;
-				curr_offset += BAM_MAX_DATA_SIZE;
+			if (remainder > BAM_FIFO_SIZE) {
+				desc->size = cpu_to_le16(BAM_FIFO_SIZE);
+				remainder -= BAM_FIFO_SIZE;
+				curr_offset += BAM_FIFO_SIZE;
 			} else {
 				desc->size = cpu_to_le16(remainder);
 				remainder = 0;
@@ -801,13 +802,17 @@
 	if (srcs & P_IRQ)
 		tasklet_schedule(&bdev->task);
 
-	if (srcs & BAM_IRQ)
+	if (srcs & BAM_IRQ) {
 		clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
 
-	/* don't allow reorder of the various accesses to the BAM registers */
-	mb();
+		/*
+		 * don't allow reorder of the various accesses to the BAM
+		 * registers
+		 */
+		mb();
 
-	writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
+		writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
+	}
 
 	return IRQ_HANDLED;
 }
@@ -1038,6 +1043,9 @@
 	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
 	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
 
+	if (bdev->controlled_remotely)
+		return 0;
+
 	/* s/w reset bam */
 	/* after reset all pipes are disabled and idle */
 	val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
@@ -1125,6 +1133,9 @@
 		return ret;
 	}
 
+	bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
+						"qcom,controlled-remotely");
+
 	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
 	if (IS_ERR(bdev->bamclk))
 		return PTR_ERR(bdev->bamclk);
@@ -1163,7 +1174,7 @@
 	/* set max dma segment size */
 	bdev->common.dev = bdev->dev;
 	bdev->common.dev->dma_parms = &bdev->dma_parms;
-	ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE);
+	ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE);
 	if (ret) {
 		dev_err(bdev->dev, "cannot set maximum segment size\n");
 		goto err_bam_channel_exit;
@@ -1234,6 +1245,9 @@
 		bam_dma_terminate_all(&bdev->channels[i].vc.chan);
 		tasklet_kill(&bdev->channels[i].vc.task);
 
+		if (!bdev->channels[i].fifo_virt)
+			continue;
+
 		dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
 			    bdev->channels[i].fifo_virt,
 			    bdev->channels[i].fifo_phys);
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index cccc78e..41b5c6d 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -1,7 +1,7 @@
 /*
  * Qualcomm Technologies HIDMA DMA engine interface
  *
- * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -404,7 +404,7 @@
 	spin_unlock_irqrestore(&mchan->lock, irqflags);
 
 	/* this suspends the existing transfer */
-	rc = hidma_ll_pause(dmadev->lldev);
+	rc = hidma_ll_disable(dmadev->lldev);
 	if (rc) {
 		dev_err(dmadev->ddev.dev, "channel did not pause\n");
 		goto out;
@@ -427,7 +427,7 @@
 		list_move(&mdesc->node, &mchan->free);
 	}
 
-	rc = hidma_ll_resume(dmadev->lldev);
+	rc = hidma_ll_enable(dmadev->lldev);
 out:
 	pm_runtime_mark_last_busy(dmadev->ddev.dev);
 	pm_runtime_put_autosuspend(dmadev->ddev.dev);
@@ -488,7 +488,7 @@
 	dmadev = to_hidma_dev(mchan->chan.device);
 	if (!mchan->paused) {
 		pm_runtime_get_sync(dmadev->ddev.dev);
-		if (hidma_ll_pause(dmadev->lldev))
+		if (hidma_ll_disable(dmadev->lldev))
 			dev_warn(dmadev->ddev.dev, "channel did not stop\n");
 		mchan->paused = true;
 		pm_runtime_mark_last_busy(dmadev->ddev.dev);
@@ -507,7 +507,7 @@
 	dmadev = to_hidma_dev(mchan->chan.device);
 	if (mchan->paused) {
 		pm_runtime_get_sync(dmadev->ddev.dev);
-		rc = hidma_ll_resume(dmadev->lldev);
+		rc = hidma_ll_enable(dmadev->lldev);
 		if (!rc)
 			mchan->paused = false;
 		else
@@ -530,6 +530,43 @@
 	return hidma_ll_inthandler(chirq, lldev);
 }
 
+static ssize_t hidma_show_values(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct hidma_dev *mdev = platform_get_drvdata(pdev);
+
+	buf[0] = 0;
+
+	if (strcmp(attr->attr.name, "chid") == 0)
+		sprintf(buf, "%d\n", mdev->chidx);
+
+	return strlen(buf);
+}
+
+static int hidma_create_sysfs_entry(struct hidma_dev *dev, char *name,
+				    int mode)
+{
+	struct device_attribute *attrs;
+	char *name_copy;
+
+	attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute),
+			     GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL);
+	if (!name_copy)
+		return -ENOMEM;
+
+	attrs->attr.name = name_copy;
+	attrs->attr.mode = mode;
+	attrs->show = hidma_show_values;
+	sysfs_attr_init(&attrs->attr);
+
+	return device_create_file(dev->ddev.dev, attrs);
+}
+
 static int hidma_probe(struct platform_device *pdev)
 {
 	struct hidma_dev *dmadev;
@@ -644,6 +681,8 @@
 
 	dmadev->irq = chirq;
 	tasklet_init(&dmadev->task, hidma_issue_task, (unsigned long)dmadev);
+	hidma_debug_init(dmadev);
+	hidma_create_sysfs_entry(dmadev, "chid", S_IRUGO);
 	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
 	platform_set_drvdata(pdev, dmadev);
 	pm_runtime_mark_last_busy(dmadev->ddev.dev);
@@ -651,6 +690,7 @@
 	return 0;
 
 uninit:
+	hidma_debug_uninit(dmadev);
 	hidma_ll_uninit(dmadev->lldev);
 dmafree:
 	if (dmadev)
@@ -668,6 +708,7 @@
 	pm_runtime_get_sync(dmadev->ddev.dev);
 	dma_async_device_unregister(&dmadev->ddev);
 	devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev);
+	hidma_debug_uninit(dmadev);
 	hidma_ll_uninit(dmadev->lldev);
 	hidma_free(dmadev);
 
@@ -689,7 +730,6 @@
 	{.compatible = "qcom,hidma-1.0",},
 	{},
 };
-
 MODULE_DEVICE_TABLE(of, hidma_match);
 
 static struct platform_driver hidma_driver = {
diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
index 231e306..db413a5 100644
--- a/drivers/dma/qcom/hidma.h
+++ b/drivers/dma/qcom/hidma.h
@@ -1,7 +1,7 @@
 /*
  * Qualcomm Technologies HIDMA data structures
  *
- * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -20,32 +20,29 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 
-#define TRE_SIZE			32 /* each TRE is 32 bytes  */
-#define TRE_CFG_IDX			0
-#define TRE_LEN_IDX			1
-#define TRE_SRC_LOW_IDX		2
-#define TRE_SRC_HI_IDX			3
-#define TRE_DEST_LOW_IDX		4
-#define TRE_DEST_HI_IDX		5
-
-struct hidma_tx_status {
-	u8 err_info;			/* error record in this transfer    */
-	u8 err_code;			/* completion code		    */
-};
+#define HIDMA_TRE_SIZE			32 /* each TRE is 32 bytes  */
+#define HIDMA_TRE_CFG_IDX		0
+#define HIDMA_TRE_LEN_IDX		1
+#define HIDMA_TRE_SRC_LOW_IDX		2
+#define HIDMA_TRE_SRC_HI_IDX		3
+#define HIDMA_TRE_DEST_LOW_IDX		4
+#define HIDMA_TRE_DEST_HI_IDX		5
 
 struct hidma_tre {
 	atomic_t allocated;		/* if this channel is allocated	    */
 	bool queued;			/* flag whether this is pending     */
 	u16 status;			/* status			    */
-	u32 chidx;			/* index of the tre		    */
+	u32 idx;			/* index of the tre		    */
 	u32 dma_sig;			/* signature of the tre		    */
 	const char *dev_name;		/* name of the device		    */
 	void (*callback)(void *data);	/* requester callback		    */
 	void *data;			/* Data associated with this channel*/
 	struct hidma_lldev *lldev;	/* lldma device pointer		    */
-	u32 tre_local[TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy        */
+	u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy  */
 	u32 tre_index;			/* the offset where this was written*/
 	u32 int_flags;			/* interrupt flags		    */
+	u8 err_info;			/* error record in this transfer    */
+	u8 err_code;			/* completion code		    */
 };
 
 struct hidma_lldev {
@@ -61,22 +58,21 @@
 	void __iomem *evca;		/* Event Channel address          */
 	struct hidma_tre
 		**pending_tre_list;	/* Pointers to pending TREs	  */
-	struct hidma_tx_status
-		*tx_status_list;	/* Pointers to pending TREs status*/
 	s32 pending_tre_count;		/* Number of TREs pending	  */
 
 	void *tre_ring;			/* TRE ring			  */
-	dma_addr_t tre_ring_handle;	/* TRE ring to be shared with HW  */
+	dma_addr_t tre_dma;		/* TRE ring to be shared with HW  */
 	u32 tre_ring_size;		/* Byte size of the ring	  */
 	u32 tre_processed_off;		/* last processed TRE		  */
 
 	void *evre_ring;		/* EVRE ring			   */
-	dma_addr_t evre_ring_handle;	/* EVRE ring to be shared with HW  */
+	dma_addr_t evre_dma;		/* EVRE ring to be shared with HW  */
 	u32 evre_ring_size;		/* Byte size of the ring	   */
 	u32 evre_processed_off;		/* last processed EVRE		   */
 
 	u32 tre_write_offset;           /* TRE write location              */
 	struct tasklet_struct task;	/* task delivering notifications   */
+	struct tasklet_struct rst_task;	/* task to reset HW                */
 	DECLARE_KFIFO_PTR(handoff_fifo,
 		struct hidma_tre *);    /* pending TREs FIFO               */
 };
@@ -145,8 +141,8 @@
 bool hidma_ll_isenabled(struct hidma_lldev *llhndl);
 void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch);
 void hidma_ll_start(struct hidma_lldev *llhndl);
-int hidma_ll_pause(struct hidma_lldev *llhndl);
-int hidma_ll_resume(struct hidma_lldev *llhndl);
+int hidma_ll_disable(struct hidma_lldev *lldev);
+int hidma_ll_enable(struct hidma_lldev *llhndl);
 void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch,
 	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags);
 int hidma_ll_setup(struct hidma_lldev *lldev);
@@ -157,4 +153,6 @@
 irqreturn_t hidma_ll_inthandler(int irq, void *arg);
 void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info,
 				u8 err_code);
+int hidma_debug_init(struct hidma_dev *dmadev);
+void hidma_debug_uninit(struct hidma_dev *dmadev);
 #endif
diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c
new file mode 100644
index 0000000..fa827e5
--- /dev/null
+++ b/drivers/dma/qcom/hidma_dbg.c
@@ -0,0 +1,217 @@
+/*
+ * Qualcomm Technologies HIDMA debug file
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/pm_runtime.h>
+
+#include "hidma.h"
+
+static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+	u32 length;
+	dma_addr_t src_start;
+	dma_addr_t dest_start;
+	u32 *tre_local;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch);
+		return;
+	}
+	tre = &lldev->trepool[tre_ch];
+	seq_printf(s, "------Channel %d -----\n", tre_ch);
+	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
+	seq_printf(s, "queued = 0x%x\n", tre->queued);
+	seq_printf(s, "err_info = 0x%x\n", tre->err_info);
+	seq_printf(s, "err_code = 0x%x\n", tre->err_code);
+	seq_printf(s, "status = 0x%x\n", tre->status);
+	seq_printf(s, "idx = 0x%x\n", tre->idx);
+	seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig);
+	seq_printf(s, "dev_name=%s\n", tre->dev_name);
+	seq_printf(s, "callback=%p\n", tre->callback);
+	seq_printf(s, "data=%p\n", tre->data);
+	seq_printf(s, "tre_index = 0x%x\n", tre->tre_index);
+
+	tre_local = &tre->tre_local[0];
+	src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX];
+	src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start;
+	dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX];
+	dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32);
+	length = tre_local[HIDMA_TRE_LEN_IDX];
+
+	seq_printf(s, "src=%pap\n", &src_start);
+	seq_printf(s, "dest=%pap\n", &dest_start);
+	seq_printf(s, "length = 0x%x\n", length);
+}
+
+static void hidma_ll_devstats(struct seq_file *s, void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+
+	seq_puts(s, "------Device -----\n");
+	seq_printf(s, "lldev init = 0x%x\n", lldev->initialized);
+	seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state);
+	seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state);
+	seq_printf(s, "chidx = 0x%x\n", lldev->chidx);
+	seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres);
+	seq_printf(s, "trca=%p\n", lldev->trca);
+	seq_printf(s, "tre_ring=%p\n", lldev->tre_ring);
+	seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma);
+	seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size);
+	seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off);
+	seq_printf(s, "pending_tre_count=%d\n", lldev->pending_tre_count);
+	seq_printf(s, "evca=%p\n", lldev->evca);
+	seq_printf(s, "evre_ring=%p\n", lldev->evre_ring);
+	seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma);
+	seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size);
+	seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off);
+	seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset);
+}
+
+/*
+ * hidma_chan_stats: display HIDMA channel statistics
+ *
+ * Display the statistics for the current HIDMA virtual channel device.
+ */
+static int hidma_chan_stats(struct seq_file *s, void *unused)
+{
+	struct hidma_chan *mchan = s->private;
+	struct hidma_desc *mdesc;
+	struct hidma_dev *dmadev = mchan->dmadev;
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	seq_printf(s, "paused=%u\n", mchan->paused);
+	seq_printf(s, "dma_sig=%u\n", mchan->dma_sig);
+	seq_puts(s, "prepared\n");
+	list_for_each_entry(mdesc, &mchan->prepared, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	seq_puts(s, "active\n");
+	list_for_each_entry(mdesc, &mchan->active, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	seq_puts(s, "completed\n");
+	list_for_each_entry(mdesc, &mchan->completed, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	hidma_ll_devstats(s, mchan->dmadev->lldev);
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	return 0;
+}
+
+/*
+ * hidma_dma_info: display HIDMA device info
+ *
+ * Display the info for the current HIDMA device.
+ */
+static int hidma_dma_info(struct seq_file *s, void *unused)
+{
+	struct hidma_dev *dmadev = s->private;
+	resource_size_t sz;
+
+	seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
+	seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
+	seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start);
+	sz = resource_size(dmadev->trca_resource);
+	seq_printf(s, "dev_trca_size=%pa\n", &sz);
+	seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
+	seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start);
+	sz = resource_size(dmadev->evca_resource);
+	seq_printf(s, "dev_evca_size=%pa\n", &sz);
+	return 0;
+}
+
+static int hidma_chan_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_chan_stats, inode->i_private);
+}
+
+static int hidma_dma_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_dma_info, inode->i_private);
+}
+
+static const struct file_operations hidma_chan_fops = {
+	.open = hidma_chan_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static const struct file_operations hidma_dma_fops = {
+	.open = hidma_dma_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void hidma_debug_uninit(struct hidma_dev *dmadev)
+{
+	debugfs_remove_recursive(dmadev->debugfs);
+	debugfs_remove_recursive(dmadev->stats);
+}
+
+int hidma_debug_init(struct hidma_dev *dmadev)
+{
+	int rc = 0;
+	int chidx = 0;
+	struct list_head *position = NULL;
+
+	dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL);
+	if (!dmadev->debugfs) {
+		rc = -ENODEV;
+		return rc;
+	}
+
+	/* walk through the virtual channel list */
+	list_for_each(position, &dmadev->ddev.channels) {
+		struct hidma_chan *chan;
+
+		chan = list_entry(position, struct hidma_chan,
+				  chan.device_node);
+		sprintf(chan->dbg_name, "chan%d", chidx);
+		chan->debugfs = debugfs_create_dir(chan->dbg_name,
+						   dmadev->debugfs);
+		if (!chan->debugfs) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chan->stats = debugfs_create_file("stats", S_IRUGO,
+						  chan->debugfs, chan,
+						  &hidma_chan_fops);
+		if (!chan->stats) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chidx++;
+	}
+
+	dmadev->stats = debugfs_create_file("stats", S_IRUGO,
+					    dmadev->debugfs, dmadev,
+					    &hidma_dma_fops);
+	if (!dmadev->stats) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+cleanup:
+	hidma_debug_uninit(dmadev);
+	return rc;
+}
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
new file mode 100644
index 0000000..f392900
--- /dev/null
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -0,0 +1,872 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine low level code
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+#include <linux/iopoll.h>
+#include <linux/kfifo.h>
+#include <linux/bitops.h>
+
+#include "hidma.h"
+
+#define HIDMA_EVRE_SIZE			16	/* each EVRE is 16 bytes */
+
+#define HIDMA_TRCA_CTRLSTS_REG			0x000
+#define HIDMA_TRCA_RING_LOW_REG		0x008
+#define HIDMA_TRCA_RING_HIGH_REG		0x00C
+#define HIDMA_TRCA_RING_LEN_REG		0x010
+#define HIDMA_TRCA_DOORBELL_REG		0x400
+
+#define HIDMA_EVCA_CTRLSTS_REG			0x000
+#define HIDMA_EVCA_INTCTRL_REG			0x004
+#define HIDMA_EVCA_RING_LOW_REG		0x008
+#define HIDMA_EVCA_RING_HIGH_REG		0x00C
+#define HIDMA_EVCA_RING_LEN_REG		0x010
+#define HIDMA_EVCA_WRITE_PTR_REG		0x020
+#define HIDMA_EVCA_DOORBELL_REG		0x400
+
+#define HIDMA_EVCA_IRQ_STAT_REG		0x100
+#define HIDMA_EVCA_IRQ_CLR_REG			0x108
+#define HIDMA_EVCA_IRQ_EN_REG			0x110
+
+#define HIDMA_EVRE_CFG_IDX			0
+
+#define HIDMA_EVRE_ERRINFO_BIT_POS		24
+#define HIDMA_EVRE_CODE_BIT_POS		28
+
+#define HIDMA_EVRE_ERRINFO_MASK		GENMASK(3, 0)
+#define HIDMA_EVRE_CODE_MASK			GENMASK(3, 0)
+
+#define HIDMA_CH_CONTROL_MASK			GENMASK(7, 0)
+#define HIDMA_CH_STATE_MASK			GENMASK(7, 0)
+#define HIDMA_CH_STATE_BIT_POS			0x8
+
+#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS	0
+#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS	1
+#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS	9
+#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS	10
+#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS	11
+#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS	14
+
+#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS))
+
+#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size)	\
+do {								\
+	iter += size;						\
+	if (iter >= ring_size)					\
+		iter -= ring_size;				\
+} while (0)
+
+#define HIDMA_CH_STATE(val)	\
+	((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK)
+
+#define HIDMA_ERR_INT_MASK				\
+	(BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)   |	\
+	 BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) |	\
+	 BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS)	    |	\
+	 BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS)    |	\
+	 BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS))
+
+enum ch_command {
+	HIDMA_CH_DISABLE = 0,
+	HIDMA_CH_ENABLE = 1,
+	HIDMA_CH_SUSPEND = 2,
+	HIDMA_CH_RESET = 9,
+};
+
+enum ch_state {
+	HIDMA_CH_DISABLED = 0,
+	HIDMA_CH_ENABLED = 1,
+	HIDMA_CH_RUNNING = 2,
+	HIDMA_CH_SUSPENDED = 3,
+	HIDMA_CH_STOPPED = 4,
+};
+
+enum tre_type {
+	HIDMA_TRE_MEMCPY = 3,
+};
+
+enum err_code {
+	HIDMA_EVRE_STATUS_COMPLETE = 1,
+	HIDMA_EVRE_STATUS_ERROR = 4,
+};
+
+static int hidma_is_chan_enabled(int state)
+{
+	switch (state) {
+	case HIDMA_CH_ENABLED:
+	case HIDMA_CH_RUNNING:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
+		return;
+	}
+
+	atomic_set(&tre->allocated, 0);
+}
+
+int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name,
+		     void (*callback)(void *data), void *data, u32 *tre_ch)
+{
+	unsigned int i;
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (!tre_ch || !lldev)
+		return -EINVAL;
+
+	/* need to have at least one empty spot in the queue */
+	for (i = 0; i < lldev->nr_tres - 1; i++) {
+		if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
+			break;
+	}
+
+	if (i == (lldev->nr_tres - 1))
+		return -ENOMEM;
+
+	tre = &lldev->trepool[i];
+	tre->dma_sig = sig;
+	tre->dev_name = dev_name;
+	tre->callback = callback;
+	tre->data = data;
+	tre->idx = i;
+	tre->status = 0;
+	tre->queued = 0;
+	tre->err_code = 0;
+	tre->err_info = 0;
+	tre->lldev = lldev;
+	tre_local = &tre->tre_local[0];
+	tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY;
+	tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8;
+	tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16);	/* set IEOB */
+	*tre_ch = i;
+	if (callback)
+		callback(data);
+	return 0;
+}
+
+/*
+ * Multiple TREs may be queued and waiting in the pending queue.
+ */
+static void hidma_ll_tre_complete(unsigned long arg)
+{
+	struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
+	struct hidma_tre *tre;
+
+	while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
+		/* call the user if it has been read by the hardware */
+		if (tre->callback)
+			tre->callback(tre->data);
+	}
+}
+
+static int hidma_post_completed(struct hidma_lldev *lldev, int tre_iterator,
+				u8 err_info, u8 err_code)
+{
+	struct hidma_tre *tre;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+	tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE];
+	if (!tre) {
+		spin_unlock_irqrestore(&lldev->lock, flags);
+		dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n",
+			 tre_iterator / HIDMA_TRE_SIZE);
+		return -EINVAL;
+	}
+	lldev->pending_tre_list[tre->tre_index] = NULL;
+
+	/*
+	 * Keep track of pending TREs that SW is expecting to receive
+	 * from HW. We got one now. Decrement our counter.
+	 */
+	lldev->pending_tre_count--;
+	if (lldev->pending_tre_count < 0) {
+		dev_warn(lldev->dev, "tre count mismatch on completion");
+		lldev->pending_tre_count = 0;
+	}
+
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	tre->err_info = err_info;
+	tre->err_code = err_code;
+	tre->queued = 0;
+
+	kfifo_put(&lldev->handoff_fifo, tre);
+	tasklet_schedule(&lldev->task);
+
+	return 0;
+}
+
+/*
+ * Called to handle the interrupt for the channel.
+ * Return a positive number if TRE or EVRE were consumed on this run.
+ * Return a positive number if there are pending TREs or EVREs.
+ * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
+ */
+static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
+{
+	u32 evre_ring_size = lldev->evre_ring_size;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	u32 err_info, err_code, evre_write_off;
+	u32 tre_iterator, evre_iterator;
+	u32 num_completed = 0;
+
+	evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+	tre_iterator = lldev->tre_processed_off;
+	evre_iterator = lldev->evre_processed_off;
+
+	if ((evre_write_off > evre_ring_size) ||
+	    (evre_write_off % HIDMA_EVRE_SIZE)) {
+		dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
+		return 0;
+	}
+
+	/*
+	 * By the time control reaches here the number of EVREs and TREs
+	 * may not match. Only consume the ones that hardware told us.
+	 */
+	while ((evre_iterator != evre_write_off)) {
+		u32 *current_evre = lldev->evre_ring + evre_iterator;
+		u32 cfg;
+
+		cfg = current_evre[HIDMA_EVRE_CFG_IDX];
+		err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS;
+		err_info &= HIDMA_EVRE_ERRINFO_MASK;
+		err_code =
+		    (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK;
+
+		if (hidma_post_completed(lldev, tre_iterator, err_info,
+					 err_code))
+			break;
+
+		HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE,
+					 tre_ring_size);
+		HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE,
+					 evre_ring_size);
+
+		/*
+		 * Read the new event descriptor written by the HW.
+		 * As we are processing the delivered events, other events
+		 * get queued to the SW for processing.
+		 */
+		evre_write_off =
+		    readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+		num_completed++;
+	}
+
+	if (num_completed) {
+		u32 evre_read_off = (lldev->evre_processed_off +
+				     HIDMA_EVRE_SIZE * num_completed);
+		u32 tre_read_off = (lldev->tre_processed_off +
+				    HIDMA_TRE_SIZE * num_completed);
+
+		evre_read_off = evre_read_off % evre_ring_size;
+		tre_read_off = tre_read_off % tre_ring_size;
+
+		writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG);
+
+		/* record the last processed tre offset */
+		lldev->tre_processed_off = tre_read_off;
+		lldev->evre_processed_off = evre_read_off;
+	}
+
+	return num_completed;
+}
+
+void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info,
+			       u8 err_code)
+{
+	u32 tre_iterator;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	int num_completed = 0;
+	u32 tre_read_off;
+
+	tre_iterator = lldev->tre_processed_off;
+	while (lldev->pending_tre_count) {
+		if (hidma_post_completed(lldev, tre_iterator, err_info,
+					 err_code))
+			break;
+		HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE,
+					 tre_ring_size);
+		num_completed++;
+	}
+	tre_read_off = (lldev->tre_processed_off +
+			HIDMA_TRE_SIZE * num_completed);
+
+	tre_read_off = tre_read_off % tre_ring_size;
+
+	/* record the last processed tre offset */
+	lldev->tre_processed_off = tre_read_off;
+}
+
+static int hidma_ll_reset(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_RESET << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not reset\n");
+		return ret;
+	}
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_RESET << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = HIDMA_CH_DISABLED;
+	lldev->evch_state = HIDMA_CH_DISABLED;
+	return 0;
+}
+
+/*
+ * Abort all transactions and perform a reset.
+ */
+static void hidma_ll_abort(unsigned long arg)
+{
+	struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
+	u8 err_code = HIDMA_EVRE_STATUS_ERROR;
+	u8 err_info = 0xFF;
+	int rc;
+
+	hidma_cleanup_pending_tre(lldev, err_info, err_code);
+
+	/* reset the channel for recovery */
+	rc = hidma_ll_setup(lldev);
+	if (rc) {
+		dev_err(lldev->dev, "channel reinitialize failed after error\n");
+		return;
+	}
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+}
+
+/*
+ * The interrupt handler for HIDMA will try to consume as many pending
+ * EVRE from the event queue as possible. Each EVRE has an associated
+ * TRE that holds the user interface parameters. EVRE reports the
+ * result of the transaction. Hardware guarantees ordering between EVREs
+ * and TREs. We use last processed offset to figure out which TRE is
+ * associated with which EVRE. If two TREs are consumed by HW, the EVREs
+ * are in order in the event ring.
+ *
+ * This handler will do a one pass for consuming EVREs. Other EVREs may
+ * be delivered while we are working. It will try to consume incoming
+ * EVREs one more time and return.
+ *
+ * For unprocessed EVREs, hardware will trigger another interrupt until
+ * all the interrupt bits are cleared.
+ *
+ * Hardware guarantees that by the time interrupt is observed, all data
+ * transactions in flight are delivered to their respective places and
+ * are visible to the CPU.
+ *
+ * On demand paging for IOMMU is only supported for PCIe via PRI
+ * (Page Request Interface) not for HIDMA. All other hardware instances
+ * including HIDMA work on pinned DMA addresses.
+ *
+ * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
+ * IOMMU latency will be built into the data movement time. By the time
+ * interrupt happens, IOMMU lookups + data movement has already taken place.
+ *
+ * While the first read in a typical PCI endpoint ISR flushes all outstanding
+ * requests traditionally to the destination, this concept does not apply
+ * here for this HW.
+ */
+irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
+{
+	struct hidma_lldev *lldev = arg;
+	u32 status;
+	u32 enable;
+	u32 cause;
+
+	/*
+	 * Fine tuned for this HW...
+	 *
+	 * This ISR has been designed for this particular hardware. Relaxed
+	 * read and write accessors are used for performance reasons due to
+	 * interrupt delivery guarantees. Do not copy this code blindly and
+	 * expect that to work.
+	 */
+	status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	cause = status & enable;
+
+	while (cause) {
+		if (cause & HIDMA_ERR_INT_MASK) {
+			dev_err(lldev->dev, "error 0x%x, resetting...\n",
+					cause);
+
+			/* Clear out pending interrupts */
+			writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+			tasklet_schedule(&lldev->rst_task);
+			goto out;
+		}
+
+		/*
+		 * Try to consume as many EVREs as possible.
+		 */
+		hidma_handle_tre_completion(lldev);
+
+		/* We consumed TREs or there are pending TREs or EVREs. */
+		writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+		/*
+		 * Another interrupt might have arrived while we are
+		 * processing this one. Read the new cause.
+		 */
+		status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+		enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+		cause = status & enable;
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+int hidma_ll_enable(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_ENABLE << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "event channel did not get enabled\n");
+		return ret;
+	}
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_ENABLE << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not get enabled\n");
+		return ret;
+	}
+
+	lldev->trch_state = HIDMA_CH_ENABLED;
+	lldev->evch_state = HIDMA_CH_ENABLED;
+
+	return 0;
+}
+
+void hidma_ll_start(struct hidma_lldev *lldev)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&lldev->lock, irqflags);
+	writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG);
+	spin_unlock_irqrestore(&lldev->lock, irqflags);
+}
+
+bool hidma_ll_isenabled(struct hidma_lldev *lldev)
+{
+	u32 val;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	lldev->trch_state = HIDMA_CH_STATE(val);
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	lldev->evch_state = HIDMA_CH_STATE(val);
+
+	/* both channels have to be enabled before calling this function */
+	if (hidma_is_chan_enabled(lldev->trch_state) &&
+	    hidma_is_chan_enabled(lldev->evch_state))
+		return true;
+
+	return false;
+}
+
+void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+	unsigned long flags;
+
+	tre = &lldev->trepool[tre_ch];
+
+	/* copy the TRE into its location in the TRE ring */
+	spin_lock_irqsave(&lldev->lock, flags);
+	tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE;
+	lldev->pending_tre_list[tre->tre_index] = tre;
+	memcpy(lldev->tre_ring + lldev->tre_write_offset,
+			&tre->tre_local[0], HIDMA_TRE_SIZE);
+	tre->err_code = 0;
+	tre->err_info = 0;
+	tre->queued = 1;
+	lldev->pending_tre_count++;
+	lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE)
+					% lldev->tre_ring_size;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+}
+
+/*
+ * Note that even though we stop this channel if there is a pending transaction
+ * in flight it will complete and follow the callback. This request will
+ * prevent further requests to be made.
+ */
+int hidma_ll_disable(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	lldev->evch_state = HIDMA_CH_STATE(val);
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	lldev->trch_state = HIDMA_CH_STATE(val);
+
+	/* already suspended by this OS */
+	if ((lldev->trch_state == HIDMA_CH_SUSPENDED) ||
+	    (lldev->evch_state == HIDMA_CH_SUSPENDED))
+		return 0;
+
+	/* already stopped by the manager */
+	if ((lldev->trch_state == HIDMA_CH_STOPPED) ||
+	    (lldev->evch_state == HIDMA_CH_STOPPED))
+		return 0;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_SUSPEND << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	/*
+	 * Start the wait right after the suspend is confirmed.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_SUSPEND << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	/*
+	 * Start the wait right after the suspend is confirmed
+	 * Delay up to 10ms after reset to allow DMA logic to quiesce.
+	 */
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = HIDMA_CH_SUSPENDED;
+	lldev->evch_state = HIDMA_CH_SUSPENDED;
+	return 0;
+}
+
+void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
+				  dma_addr_t src, dma_addr_t dest, u32 len,
+				  u32 flags)
+{
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in transfer params:%d",
+			tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev, "trying to set params on an unused TRE:%d",
+			tre_ch);
+		return;
+	}
+
+	tre_local = &tre->tre_local[0];
+	tre_local[HIDMA_TRE_LEN_IDX] = len;
+	tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src);
+	tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src);
+	tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest);
+	tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest);
+	tre->int_flags = flags;
+}
+
+/*
+ * Called during initialization and after an error condition
+ * to restore hardware state.
+ */
+int hidma_ll_setup(struct hidma_lldev *lldev)
+{
+	int rc;
+	u64 addr;
+	u32 val;
+	u32 nr_tres = lldev->nr_tres;
+
+	lldev->pending_tre_count = 0;
+	lldev->tre_processed_off = 0;
+	lldev->evre_processed_off = 0;
+	lldev->tre_write_offset = 0;
+
+	/* disable interrupts */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	/* clear all pending interrupts */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+	rc = hidma_ll_reset(lldev);
+	if (rc)
+		return rc;
+
+	/*
+	 * Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+	/* disable interrupts again after reset */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	addr = lldev->tre_dma;
+	writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG);
+	writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG);
+	writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG);
+
+	addr = lldev->evre_dma;
+	writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG);
+	writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG);
+	writel(HIDMA_EVRE_SIZE * nr_tres,
+			lldev->evca + HIDMA_EVCA_RING_LEN_REG);
+
+	/* support IRQ only for now */
+	val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+	val &= ~0xF;
+	val |= 0x1;
+	writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+
+	/* clear all pending interrupts and enable them */
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	return hidma_ll_enable(lldev);
+}
+
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
+				  void __iomem *trca, void __iomem *evca,
+				  u8 chidx)
+{
+	u32 required_bytes;
+	struct hidma_lldev *lldev;
+	int rc;
+	size_t sz;
+
+	if (!trca || !evca || !dev || !nr_tres)
+		return NULL;
+
+	/* need at least four TREs */
+	if (nr_tres < 4)
+		return NULL;
+
+	/* need an extra space */
+	nr_tres += 1;
+
+	lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
+	if (!lldev)
+		return NULL;
+
+	lldev->evca = evca;
+	lldev->trca = trca;
+	lldev->dev = dev;
+	sz = sizeof(struct hidma_tre);
+	lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL);
+	if (!lldev->trepool)
+		return NULL;
+
+	required_bytes = sizeof(lldev->pending_tre_list[0]);
+	lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes,
+					       GFP_KERNEL);
+	if (!lldev->pending_tre_list)
+		return NULL;
+
+	sz = (HIDMA_TRE_SIZE + 1) * nr_tres;
+	lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma,
+					      GFP_KERNEL);
+	if (!lldev->tre_ring)
+		return NULL;
+
+	memset(lldev->tre_ring, 0, (HIDMA_TRE_SIZE + 1) * nr_tres);
+	lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres;
+	lldev->nr_tres = nr_tres;
+
+	/* the TRE ring has to be TRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) {
+		u8 tre_ring_shift;
+
+		tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE;
+		tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift;
+		lldev->tre_dma += tre_ring_shift;
+		lldev->tre_ring += tre_ring_shift;
+	}
+
+	sz = (HIDMA_EVRE_SIZE + 1) * nr_tres;
+	lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma,
+					       GFP_KERNEL);
+	if (!lldev->evre_ring)
+		return NULL;
+
+	memset(lldev->evre_ring, 0, (HIDMA_EVRE_SIZE + 1) * nr_tres);
+	lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres;
+
+	/* the EVRE ring has to be EVRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) {
+		u8 evre_ring_shift;
+
+		evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE;
+		evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift;
+		lldev->evre_dma += evre_ring_shift;
+		lldev->evre_ring += evre_ring_shift;
+	}
+	lldev->nr_tres = nr_tres;
+	lldev->chidx = chidx;
+
+	sz = nr_tres * sizeof(struct hidma_tre *);
+	rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL);
+	if (rc)
+		return NULL;
+
+	rc = hidma_ll_setup(lldev);
+	if (rc)
+		return NULL;
+
+	spin_lock_init(&lldev->lock);
+	tasklet_init(&lldev->rst_task, hidma_ll_abort, (unsigned long)lldev);
+	tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev);
+	lldev->initialized = 1;
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	return lldev;
+}
+
+int hidma_ll_uninit(struct hidma_lldev *lldev)
+{
+	u32 required_bytes;
+	int rc = 0;
+	u32 val;
+
+	if (!lldev)
+		return -ENODEV;
+
+	if (!lldev->initialized)
+		return 0;
+
+	lldev->initialized = 0;
+
+	required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
+	tasklet_kill(&lldev->task);
+	memset(lldev->trepool, 0, required_bytes);
+	lldev->trepool = NULL;
+	lldev->pending_tre_count = 0;
+	lldev->tre_write_offset = 0;
+
+	rc = hidma_ll_reset(lldev);
+
+	/*
+	 * Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	return rc;
+}
+
+enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	enum dma_status ret = DMA_ERROR;
+	struct hidma_tre *tre;
+	unsigned long flags;
+	u8 err_code;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+
+	tre = &lldev->trepool[tre_ch];
+	err_code = tre->err_code;
+
+	if (err_code & HIDMA_EVRE_STATUS_COMPLETE)
+		ret = DMA_COMPLETE;
+	else if (err_code & HIDMA_EVRE_STATUS_ERROR)
+		ret = DMA_ERROR;
+	else
+		ret = DMA_IN_PROGRESS;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	return ret;
+}
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index ef491b8..c0e3653 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -1,7 +1,7 @@
 /*
  * Qualcomm Technologies HIDMA DMA engine Management interface
  *
- * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -17,13 +17,14 @@
 #include <linux/acpi.h>
 #include <linux/of.h>
 #include <linux/property.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 
 #include "hidma_mgmt.h"
 
@@ -298,5 +299,109 @@
 	},
 };
 
-module_platform_driver(hidma_mgmt_driver);
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+static int object_counter;
+
+static int __init hidma_mgmt_of_populate_channels(struct device_node *np)
+{
+	struct platform_device *pdev_parent = of_find_device_by_node(np);
+	struct platform_device_info pdevinfo;
+	struct of_phandle_args out_irq;
+	struct device_node *child;
+	struct resource *res;
+	const __be32 *cell;
+	int ret = 0, size, i, num;
+	u64 addr, addr_size;
+
+	for_each_available_child_of_node(np, child) {
+		struct resource *res_iter;
+		struct platform_device *new_pdev;
+
+		cell = of_get_property(child, "reg", &size);
+		if (!cell) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		size /= sizeof(*cell);
+		num = size /
+			(of_n_addr_cells(child) + of_n_size_cells(child)) + 1;
+
+		/* allocate a resource array */
+		res = kcalloc(num, sizeof(*res), GFP_KERNEL);
+		if (!res) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* read each reg value */
+		i = 0;
+		res_iter = res;
+		while (i < size) {
+			addr = of_read_number(&cell[i],
+					      of_n_addr_cells(child));
+			i += of_n_addr_cells(child);
+
+			addr_size = of_read_number(&cell[i],
+						   of_n_size_cells(child));
+			i += of_n_size_cells(child);
+
+			res_iter->start = addr;
+			res_iter->end = res_iter->start + addr_size - 1;
+			res_iter->flags = IORESOURCE_MEM;
+			res_iter++;
+		}
+
+		ret = of_irq_parse_one(child, 0, &out_irq);
+		if (ret)
+			goto out;
+
+		res_iter->start = irq_create_of_mapping(&out_irq);
+		res_iter->name = "hidma event irq";
+		res_iter->flags = IORESOURCE_IRQ;
+
+		memset(&pdevinfo, 0, sizeof(pdevinfo));
+		pdevinfo.fwnode = &child->fwnode;
+		pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL;
+		pdevinfo.name = child->name;
+		pdevinfo.id = object_counter++;
+		pdevinfo.res = res;
+		pdevinfo.num_res = num;
+		pdevinfo.data = NULL;
+		pdevinfo.size_data = 0;
+		pdevinfo.dma_mask = DMA_BIT_MASK(64);
+		new_pdev = platform_device_register_full(&pdevinfo);
+		if (!new_pdev) {
+			ret = -ENODEV;
+			goto out;
+		}
+		of_dma_configure(&new_pdev->dev, child);
+
+		kfree(res);
+		res = NULL;
+	}
+out:
+	kfree(res);
+
+	return ret;
+}
+#endif
+
+static int __init hidma_mgmt_init(void)
+{
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+	struct device_node *child;
+
+	for (child = of_find_matching_node(NULL, hidma_mgmt_match); child;
+	     child = of_find_matching_node(child, hidma_mgmt_match)) {
+		/* device tree based firmware here */
+		hidma_mgmt_of_populate_channels(child);
+		of_node_put(child);
+	}
+#endif
+	platform_driver_register(&hidma_mgmt_driver);
+
+	return 0;
+}
+module_init(hidma_mgmt_init);
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index 2db12e4..5065ca4 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -146,6 +146,8 @@
 	struct dma_slave_config	cfg;
 	struct sun6i_pchan	*phy;
 	u8			port;
+	u8			irq_type;
+	bool			cyclic;
 };
 
 struct sun6i_dma_dev {
@@ -254,6 +256,30 @@
 	return addr_width >> 1;
 }
 
+static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan)
+{
+	struct sun6i_desc *txd = pchan->desc;
+	struct sun6i_dma_lli *lli;
+	size_t bytes;
+	dma_addr_t pos;
+
+	pos = readl(pchan->base + DMA_CHAN_LLI_ADDR);
+	bytes = readl(pchan->base + DMA_CHAN_CUR_CNT);
+
+	if (pos == LLI_LAST_ITEM)
+		return bytes;
+
+	for (lli = txd->v_lli; lli; lli = lli->v_lli_next) {
+		if (lli->p_lli_next == pos) {
+			for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next)
+				bytes += lli->len;
+			break;
+		}
+	}
+
+	return bytes;
+}
+
 static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev,
 			       struct sun6i_dma_lli *next,
 			       dma_addr_t next_phy,
@@ -276,45 +302,6 @@
 	return next;
 }
 
-static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli,
-				    dma_addr_t src,
-				    dma_addr_t dst, u32 len,
-				    struct dma_slave_config *config)
-{
-	u8 src_width, dst_width, src_burst, dst_burst;
-
-	if (!config)
-		return -EINVAL;
-
-	src_burst = convert_burst(config->src_maxburst);
-	if (src_burst)
-		return src_burst;
-
-	dst_burst = convert_burst(config->dst_maxburst);
-	if (dst_burst)
-		return dst_burst;
-
-	src_width = convert_buswidth(config->src_addr_width);
-	if (src_width)
-		return src_width;
-
-	dst_width = convert_buswidth(config->dst_addr_width);
-	if (dst_width)
-		return dst_width;
-
-	lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) |
-		DMA_CHAN_CFG_SRC_WIDTH(src_width) |
-		DMA_CHAN_CFG_DST_BURST(dst_burst) |
-		DMA_CHAN_CFG_DST_WIDTH(dst_width);
-
-	lli->src = src;
-	lli->dst = dst;
-	lli->len = len;
-	lli->para = NORMAL_WAIT;
-
-	return 0;
-}
-
 static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan,
 				      struct sun6i_dma_lli *lli)
 {
@@ -381,9 +368,13 @@
 	irq_reg = pchan->idx / DMA_IRQ_CHAN_NR;
 	irq_offset = pchan->idx % DMA_IRQ_CHAN_NR;
 
-	irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset));
-	irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH);
-	writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset));
+	vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE;
+
+	irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg));
+	irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) <<
+			(irq_offset * DMA_IRQ_CHAN_WIDTH));
+	irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH);
+	writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg));
 
 	writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR);
 	writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE);
@@ -479,11 +470,12 @@
 		writel(status, sdev->base + DMA_IRQ_STAT(i));
 
 		for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
-			if (status & DMA_IRQ_QUEUE) {
-				pchan = sdev->pchans + j;
-				vchan = pchan->vchan;
-
-				if (vchan) {
+			pchan = sdev->pchans + j;
+			vchan = pchan->vchan;
+			if (vchan && (status & vchan->irq_type)) {
+				if (vchan->cyclic) {
+					vchan_cyclic_callback(&pchan->desc->vd);
+				} else {
 					spin_lock(&vchan->vc.lock);
 					vchan_cookie_complete(&pchan->desc->vd);
 					pchan->done = pchan->desc;
@@ -502,6 +494,55 @@
 	return ret;
 }
 
+static int set_config(struct sun6i_dma_dev *sdev,
+			struct dma_slave_config *sconfig,
+			enum dma_transfer_direction direction,
+			u32 *p_cfg)
+{
+	s8 src_width, dst_width, src_burst, dst_burst;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		src_burst = convert_burst(sconfig->src_maxburst ?
+					sconfig->src_maxburst : 8);
+		src_width = convert_buswidth(sconfig->src_addr_width !=
+						DMA_SLAVE_BUSWIDTH_UNDEFINED ?
+				sconfig->src_addr_width :
+				DMA_SLAVE_BUSWIDTH_4_BYTES);
+		dst_burst = convert_burst(sconfig->dst_maxburst);
+		dst_width = convert_buswidth(sconfig->dst_addr_width);
+		break;
+	case DMA_DEV_TO_MEM:
+		src_burst = convert_burst(sconfig->src_maxburst);
+		src_width = convert_buswidth(sconfig->src_addr_width);
+		dst_burst = convert_burst(sconfig->dst_maxburst ?
+					sconfig->dst_maxburst : 8);
+		dst_width = convert_buswidth(sconfig->dst_addr_width !=
+						DMA_SLAVE_BUSWIDTH_UNDEFINED ?
+				sconfig->dst_addr_width :
+				DMA_SLAVE_BUSWIDTH_4_BYTES);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (src_burst < 0)
+		return src_burst;
+	if (src_width < 0)
+		return src_width;
+	if (dst_burst < 0)
+		return dst_burst;
+	if (dst_width < 0)
+		return dst_width;
+
+	*p_cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) |
+		DMA_CHAN_CFG_SRC_WIDTH(src_width) |
+		DMA_CHAN_CFG_DST_BURST(dst_burst) |
+		DMA_CHAN_CFG_DST_WIDTH(dst_width);
+
+	return 0;
+}
+
 static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
@@ -569,13 +610,15 @@
 	struct sun6i_desc *txd;
 	struct scatterlist *sg;
 	dma_addr_t p_lli;
+	u32 lli_cfg;
 	int i, ret;
 
 	if (!sgl)
 		return NULL;
 
-	if (!is_slave_direction(dir)) {
-		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+	ret = set_config(sdev, sconfig, dir, &lli_cfg);
+	if (ret) {
+		dev_err(chan2dev(chan), "Invalid DMA configuration\n");
 		return NULL;
 	}
 
@@ -588,14 +631,14 @@
 		if (!v_lli)
 			goto err_lli_free;
 
-		if (dir == DMA_MEM_TO_DEV) {
-			ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg),
-						sconfig->dst_addr, sg_dma_len(sg),
-						sconfig);
-			if (ret)
-				goto err_cur_lli_free;
+		v_lli->len = sg_dma_len(sg);
+		v_lli->para = NORMAL_WAIT;
 
-			v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE |
+		if (dir == DMA_MEM_TO_DEV) {
+			v_lli->src = sg_dma_address(sg);
+			v_lli->dst = sconfig->dst_addr;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_IO_MODE |
 				DMA_CHAN_CFG_SRC_LINEAR_MODE |
 				DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
 				DMA_CHAN_CFG_DST_DRQ(vchan->port);
@@ -607,13 +650,10 @@
 				sg_dma_len(sg), flags);
 
 		} else {
-			ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr,
-						sg_dma_address(sg), sg_dma_len(sg),
-						sconfig);
-			if (ret)
-				goto err_cur_lli_free;
-
-			v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE |
+			v_lli->src = sconfig->src_addr;
+			v_lli->dst = sg_dma_address(sg);
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_LINEAR_MODE |
 				DMA_CHAN_CFG_SRC_IO_MODE |
 				DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
 				DMA_CHAN_CFG_SRC_DRQ(vchan->port);
@@ -634,8 +674,78 @@
 
 	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
 
-err_cur_lli_free:
-	dma_pool_free(sdev->pool, v_lli, p_lli);
+err_lli_free:
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		dma_pool_free(sdev->pool, prev, virt_to_phys(prev));
+	kfree(txd);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic(
+					struct dma_chan *chan,
+					dma_addr_t buf_addr,
+					size_t buf_len,
+					size_t period_len,
+					enum dma_transfer_direction dir,
+					unsigned long flags)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun6i_dma_lli *v_lli, *prev = NULL;
+	struct sun6i_desc *txd;
+	dma_addr_t p_lli;
+	u32 lli_cfg;
+	unsigned int i, periods = buf_len / period_len;
+	int ret;
+
+	ret = set_config(sdev, sconfig, dir, &lli_cfg);
+	if (ret) {
+		dev_err(chan2dev(chan), "Invalid DMA configuration\n");
+		return NULL;
+	}
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	for (i = 0; i < periods; i++) {
+		v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+		if (!v_lli) {
+			dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+			goto err_lli_free;
+		}
+
+		v_lli->len = period_len;
+		v_lli->para = NORMAL_WAIT;
+
+		if (dir == DMA_MEM_TO_DEV) {
+			v_lli->src = buf_addr + period_len * i;
+			v_lli->dst = sconfig->dst_addr;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_IO_MODE |
+				DMA_CHAN_CFG_SRC_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_DST_DRQ(vchan->port);
+		} else {
+			v_lli->src = sconfig->src_addr;
+			v_lli->dst = buf_addr + period_len * i;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_IO_MODE |
+				DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_SRC_DRQ(vchan->port);
+		}
+
+		prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+	}
+
+	prev->p_lli_next = txd->p_lli;		/* cyclic list */
+
+	vchan->cyclic = true;
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
 err_lli_free:
 	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
 		dma_pool_free(sdev->pool, prev, virt_to_phys(prev));
@@ -712,6 +822,16 @@
 
 	spin_lock_irqsave(&vchan->vc.lock, flags);
 
+	if (vchan->cyclic) {
+		vchan->cyclic = false;
+		if (pchan && pchan->desc) {
+			struct virt_dma_desc *vd = &pchan->desc->vd;
+			struct virt_dma_chan *vc = &vchan->vc;
+
+			list_add_tail(&vd->node, &vc->desc_completed);
+		}
+	}
+
 	vchan_get_all_descriptors(&vchan->vc, &head);
 
 	if (pchan) {
@@ -759,7 +879,7 @@
 	} else if (!pchan || !pchan->desc) {
 		bytes = 0;
 	} else {
-		bytes = readl(pchan->base + DMA_CHAN_CUR_CNT);
+		bytes = sun6i_get_chan_size(pchan);
 	}
 
 	spin_unlock_irqrestore(&vchan->vc.lock, flags);
@@ -963,6 +1083,7 @@
 	dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask);
 	dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask);
 
 	INIT_LIST_HEAD(&sdc->slave.channels);
 	sdc->slave.device_free_chan_resources	= sun6i_dma_free_chan_resources;
@@ -970,6 +1091,7 @@
 	sdc->slave.device_issue_pending		= sun6i_dma_issue_pending;
 	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
 	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
+	sdc->slave.device_prep_dma_cyclic	= sun6i_dma_prep_dma_cyclic;
 	sdc->slave.copy_align			= DMAENGINE_ALIGN_4_BYTES;
 	sdc->slave.device_config		= sun6i_dma_config;
 	sdc->slave.device_pause			= sun6i_dma_pause;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 3871f29..01e316f 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -54,6 +54,7 @@
 #define TEGRA_APBDMA_CSR_ONCE			BIT(27)
 #define TEGRA_APBDMA_CSR_FLOW			BIT(21)
 #define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT		16
+#define TEGRA_APBDMA_CSR_REQ_SEL_MASK		0x1F
 #define TEGRA_APBDMA_CSR_WCOUNT_MASK		0xFFFC
 
 /* STATUS register */
@@ -114,6 +115,8 @@
 /* Channel base address offset from APBDMA base address */
 #define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET	0x1000
 
+#define TEGRA_APBDMA_SLAVE_ID_INVALID	(TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1)
+
 struct tegra_dma;
 
 /*
@@ -353,8 +356,11 @@
 	}
 
 	memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig));
-	if (!tdc->slave_id)
+	if (tdc->slave_id == TEGRA_APBDMA_SLAVE_ID_INVALID) {
+		if (sconfig->slave_id > TEGRA_APBDMA_CSR_REQ_SEL_MASK)
+			return -EINVAL;
 		tdc->slave_id = sconfig->slave_id;
+	}
 	tdc->config_init = true;
 	return 0;
 }
@@ -1236,7 +1242,7 @@
 	}
 	pm_runtime_put(tdma->dev);
 
-	tdc->slave_id = 0;
+	tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
 }
 
 static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -1246,6 +1252,11 @@
 	struct dma_chan *chan;
 	struct tegra_dma_channel *tdc;
 
+	if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) {
+		dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]);
+		return NULL;
+	}
+
 	chan = dma_get_any_slave_channel(&tdma->dma_dev);
 	if (!chan)
 		return NULL;
@@ -1389,6 +1400,7 @@
 				&tdma->dma_dev.channels);
 		tdc->tdma = tdma;
 		tdc->id = i;
+		tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
 
 		tasklet_init(&tdc->tasklet, tegra_dma_tasklet,
 				(unsigned long)tdc);
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
new file mode 100644
index 0000000..c4b121c
--- /dev/null
+++ b/drivers/dma/tegra210-adma.c
@@ -0,0 +1,840 @@
+/*
+ * ADMA driver for Nvidia's Tegra210 ADMA controller.
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define ADMA_CH_CMD					0x00
+#define ADMA_CH_STATUS					0x0c
+#define ADMA_CH_STATUS_XFER_EN				BIT(0)
+
+#define ADMA_CH_INT_STATUS				0x10
+#define ADMA_CH_INT_STATUS_XFER_DONE			BIT(0)
+
+#define ADMA_CH_INT_CLEAR				0x1c
+#define ADMA_CH_CTRL					0x24
+#define ADMA_CH_CTRL_TX_REQ(val)			(((val) & 0xf) << 28)
+#define ADMA_CH_CTRL_TX_REQ_MAX				10
+#define ADMA_CH_CTRL_RX_REQ(val)			(((val) & 0xf) << 24)
+#define ADMA_CH_CTRL_RX_REQ_MAX				10
+#define ADMA_CH_CTRL_DIR(val)				(((val) & 0xf) << 12)
+#define ADMA_CH_CTRL_DIR_AHUB2MEM			2
+#define ADMA_CH_CTRL_DIR_MEM2AHUB			4
+#define ADMA_CH_CTRL_MODE_CONTINUOUS			(2 << 8)
+#define ADMA_CH_CTRL_FLOWCTRL_EN			BIT(1)
+
+#define ADMA_CH_CONFIG					0x28
+#define ADMA_CH_CONFIG_SRC_BUF(val)			(((val) & 0x7) << 28)
+#define ADMA_CH_CONFIG_TRG_BUF(val)			(((val) & 0x7) << 24)
+#define ADMA_CH_CONFIG_BURST_SIZE(val)			(((val) & 0x7) << 20)
+#define ADMA_CH_CONFIG_BURST_16				5
+#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val)		((val) & 0xf)
+#define ADMA_CH_CONFIG_MAX_BUFS				8
+
+#define ADMA_CH_FIFO_CTRL				0x2c
+#define ADMA_CH_FIFO_CTRL_OVRFW_THRES(val)		(((val) & 0xf) << 24)
+#define ADMA_CH_FIFO_CTRL_STARV_THRES(val)		(((val) & 0xf) << 16)
+#define ADMA_CH_FIFO_CTRL_TX_SIZE(val)			(((val) & 0xf) << 8)
+#define ADMA_CH_FIFO_CTRL_RX_SIZE(val)			((val) & 0xf)
+
+#define ADMA_CH_LOWER_SRC_ADDR				0x34
+#define ADMA_CH_LOWER_TRG_ADDR				0x3c
+#define ADMA_CH_TC					0x44
+#define ADMA_CH_TC_COUNT_MASK				0x3ffffffc
+
+#define ADMA_CH_XFER_STATUS				0x54
+#define ADMA_CH_XFER_STATUS_COUNT_MASK			0xffff
+
+#define ADMA_GLOBAL_CMD					0xc00
+#define ADMA_GLOBAL_SOFT_RESET				0xc04
+#define ADMA_GLOBAL_INT_CLEAR				0xc20
+#define ADMA_GLOBAL_CTRL				0xc24
+
+#define ADMA_CH_REG_OFFSET(a)				(a * 0x80)
+
+#define ADMA_CH_FIFO_CTRL_DEFAULT	(ADMA_CH_FIFO_CTRL_OVRFW_THRES(1) | \
+					 ADMA_CH_FIFO_CTRL_STARV_THRES(1) | \
+					 ADMA_CH_FIFO_CTRL_TX_SIZE(3)     | \
+					 ADMA_CH_FIFO_CTRL_RX_SIZE(3))
+struct tegra_adma;
+
+/*
+ * struct tegra_adma_chip_data - Tegra chip specific data
+ * @nr_channels: Number of DMA channels available.
+ */
+struct tegra_adma_chip_data {
+	int nr_channels;
+};
+
+/*
+ * struct tegra_adma_chan_regs - Tegra ADMA channel registers
+ */
+struct tegra_adma_chan_regs {
+	unsigned int ctrl;
+	unsigned int config;
+	unsigned int src_addr;
+	unsigned int trg_addr;
+	unsigned int fifo_ctrl;
+	unsigned int tc;
+};
+
+/*
+ * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests.
+ */
+struct tegra_adma_desc {
+	struct virt_dma_desc		vd;
+	struct tegra_adma_chan_regs	ch_regs;
+	size_t				buf_len;
+	size_t				period_len;
+	size_t				num_periods;
+};
+
+/*
+ * struct tegra_adma_chan - Tegra ADMA channel information
+ */
+struct tegra_adma_chan {
+	struct virt_dma_chan		vc;
+	struct tegra_adma_desc		*desc;
+	struct tegra_adma		*tdma;
+	int				irq;
+	void __iomem			*chan_addr;
+
+	/* Slave channel configuration info */
+	struct dma_slave_config		sconfig;
+	enum dma_transfer_direction	sreq_dir;
+	unsigned int			sreq_index;
+	bool				sreq_reserved;
+
+	/* Transfer count and position info */
+	unsigned int			tx_buf_count;
+	unsigned int			tx_buf_pos;
+};
+
+/*
+ * struct tegra_adma - Tegra ADMA controller information
+ */
+struct tegra_adma {
+	struct dma_device		dma_dev;
+	struct device			*dev;
+	void __iomem			*base_addr;
+	unsigned int			nr_channels;
+	unsigned long			rx_requests_reserved;
+	unsigned long			tx_requests_reserved;
+
+	/* Used to store global command register state when suspending */
+	unsigned int			global_cmd;
+
+	/* Last member of the structure */
+	struct tegra_adma_chan		channels[0];
+};
+
+static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val)
+{
+	writel(val, tdma->base_addr + reg);
+}
+
+static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg)
+{
+	return readl(tdma->base_addr + reg);
+}
+
+static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val)
+{
+	writel(val, tdc->chan_addr + reg);
+}
+
+static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg)
+{
+	return readl(tdc->chan_addr + reg);
+}
+
+static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc)
+{
+	return container_of(dc, struct tegra_adma_chan, vc.chan);
+}
+
+static inline struct tegra_adma_desc *to_tegra_adma_desc(
+		struct dma_async_tx_descriptor *td)
+{
+	return container_of(td, struct tegra_adma_desc, vd.tx);
+}
+
+static inline struct device *tdc2dev(struct tegra_adma_chan *tdc)
+{
+	return tdc->tdma->dev;
+}
+
+static void tegra_adma_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct tegra_adma_desc, vd));
+}
+
+static int tegra_adma_slave_config(struct dma_chan *dc,
+				   struct dma_slave_config *sconfig)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+	memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig));
+
+	return 0;
+}
+
+static int tegra_adma_init(struct tegra_adma *tdma)
+{
+	u32 status;
+	int ret;
+
+	/* Clear any interrupts */
+	tdma_write(tdma, ADMA_GLOBAL_INT_CLEAR, 0x1);
+
+	/* Assert soft reset */
+	tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1);
+
+	/* Wait for reset to clear */
+	ret = readx_poll_timeout(readl,
+				 tdma->base_addr + ADMA_GLOBAL_SOFT_RESET,
+				 status, status == 0, 20, 10000);
+	if (ret)
+		return ret;
+
+	/* Enable global ADMA registers */
+	tdma_write(tdma, ADMA_GLOBAL_CMD, 1);
+
+	return 0;
+}
+
+static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc,
+				    enum dma_transfer_direction direction)
+{
+	struct tegra_adma *tdma = tdc->tdma;
+	unsigned int sreq_index = tdc->sreq_index;
+
+	if (tdc->sreq_reserved)
+		return tdc->sreq_dir == direction ? 0 : -EINVAL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		if (sreq_index > ADMA_CH_CTRL_TX_REQ_MAX) {
+			dev_err(tdma->dev, "invalid DMA request\n");
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) {
+			dev_err(tdma->dev, "DMA request reserved\n");
+			return -EINVAL;
+		}
+		break;
+
+	case DMA_DEV_TO_MEM:
+		if (sreq_index > ADMA_CH_CTRL_RX_REQ_MAX) {
+			dev_err(tdma->dev, "invalid DMA request\n");
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) {
+			dev_err(tdma->dev, "DMA request reserved\n");
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+			 dma_chan_name(&tdc->vc.chan));
+		return -EINVAL;
+	}
+
+	tdc->sreq_dir = direction;
+	tdc->sreq_reserved = true;
+
+	return 0;
+}
+
+static void tegra_adma_request_free(struct tegra_adma_chan *tdc)
+{
+	struct tegra_adma *tdma = tdc->tdma;
+
+	if (!tdc->sreq_reserved)
+		return;
+
+	switch (tdc->sreq_dir) {
+	case DMA_MEM_TO_DEV:
+		clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved);
+		break;
+
+	case DMA_DEV_TO_MEM:
+		clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved);
+		break;
+
+	default:
+		dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+			 dma_chan_name(&tdc->vc.chan));
+		return;
+	}
+
+	tdc->sreq_reserved = false;
+}
+
+static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc)
+{
+	u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS);
+
+	return status & ADMA_CH_INT_STATUS_XFER_DONE;
+}
+
+static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc)
+{
+	u32 status = tegra_adma_irq_status(tdc);
+
+	if (status)
+		tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status);
+
+	return status;
+}
+
+static void tegra_adma_stop(struct tegra_adma_chan *tdc)
+{
+	unsigned int status;
+
+	/* Disable ADMA */
+	tdma_ch_write(tdc, ADMA_CH_CMD, 0);
+
+	/* Clear interrupt status */
+	tegra_adma_irq_clear(tdc);
+
+	if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS,
+			status, !(status & ADMA_CH_STATUS_XFER_EN),
+			20, 10000)) {
+		dev_err(tdc2dev(tdc), "unable to stop DMA channel\n");
+		return;
+	}
+
+	kfree(tdc->desc);
+	tdc->desc = NULL;
+}
+
+static void tegra_adma_start(struct tegra_adma_chan *tdc)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc);
+	struct tegra_adma_chan_regs *ch_regs;
+	struct tegra_adma_desc *desc;
+
+	if (!vd)
+		return;
+
+	list_del(&vd->node);
+
+	desc = to_tegra_adma_desc(&vd->tx);
+
+	if (!desc) {
+		dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n");
+		return;
+	}
+
+	ch_regs = &desc->ch_regs;
+
+	tdc->tx_buf_pos = 0;
+	tdc->tx_buf_count = 0;
+	tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc);
+	tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
+	tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr);
+	tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr);
+	tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl);
+	tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config);
+
+	/* Start ADMA */
+	tdma_ch_write(tdc, ADMA_CH_CMD, 1);
+
+	tdc->desc = desc;
+}
+
+static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc)
+{
+	struct tegra_adma_desc *desc = tdc->desc;
+	unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1;
+	unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS);
+	unsigned int periods_remaining;
+
+	/*
+	 * Handle wrap around of buffer count register
+	 */
+	if (pos < tdc->tx_buf_pos)
+		tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos);
+	else
+		tdc->tx_buf_count += pos - tdc->tx_buf_pos;
+
+	periods_remaining = tdc->tx_buf_count % desc->num_periods;
+	tdc->tx_buf_pos = pos;
+
+	return desc->buf_len - (periods_remaining * desc->period_len);
+}
+
+static irqreturn_t tegra_adma_isr(int irq, void *dev_id)
+{
+	struct tegra_adma_chan *tdc = dev_id;
+	unsigned long status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	status = tegra_adma_irq_clear(tdc);
+	if (status == 0 || !tdc->desc) {
+		spin_unlock_irqrestore(&tdc->vc.lock, flags);
+		return IRQ_NONE;
+	}
+
+	vchan_cyclic_callback(&tdc->desc->vd);
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static void tegra_adma_issue_pending(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	if (vchan_issue_pending(&tdc->vc)) {
+		if (!tdc->desc)
+			tegra_adma_start(tdc);
+	}
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+}
+
+static int tegra_adma_terminate_all(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	if (tdc->desc)
+		tegra_adma_stop(tdc);
+
+	tegra_adma_request_free(tdc);
+	vchan_get_all_descriptors(&tdc->vc, &head);
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+	vchan_dma_desc_free_list(&tdc->vc, &head);
+
+	return 0;
+}
+
+static enum dma_status tegra_adma_tx_status(struct dma_chan *dc,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	struct tegra_adma_desc *desc;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	unsigned int residual;
+
+	ret = dma_cookie_status(dc, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	vd = vchan_find_desc(&tdc->vc, cookie);
+	if (vd) {
+		desc = to_tegra_adma_desc(&vd->tx);
+		residual = desc->ch_regs.tc;
+	} else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) {
+		residual = tegra_adma_get_residue(tdc);
+	} else {
+		residual = 0;
+	}
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+	dma_set_residue(txstate, residual);
+
+	return ret;
+}
+
+static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
+				      struct tegra_adma_desc *desc,
+				      dma_addr_t buf_addr,
+				      enum dma_transfer_direction direction)
+{
+	struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs;
+	unsigned int burst_size, adma_dir;
+
+	if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS)
+		return -EINVAL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB;
+		burst_size = fls(tdc->sconfig.dst_maxburst);
+		ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1);
+		ch_regs->ctrl = ADMA_CH_CTRL_TX_REQ(tdc->sreq_index);
+		ch_regs->src_addr = buf_addr;
+		break;
+
+	case DMA_DEV_TO_MEM:
+		adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM;
+		burst_size = fls(tdc->sconfig.src_maxburst);
+		ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1);
+		ch_regs->ctrl = ADMA_CH_CTRL_RX_REQ(tdc->sreq_index);
+		ch_regs->trg_addr = buf_addr;
+		break;
+
+	default:
+		dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
+		return -EINVAL;
+	}
+
+	if (!burst_size || burst_size > ADMA_CH_CONFIG_BURST_16)
+		burst_size = ADMA_CH_CONFIG_BURST_16;
+
+	ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) |
+			 ADMA_CH_CTRL_MODE_CONTINUOUS |
+			 ADMA_CH_CTRL_FLOWCTRL_EN;
+	ch_regs->config |= ADMA_CH_CONFIG_BURST_SIZE(burst_size);
+	ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
+	ch_regs->fifo_ctrl = ADMA_CH_FIFO_CTRL_DEFAULT;
+	ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK;
+
+	return tegra_adma_request_alloc(tdc, direction);
+}
+
+static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic(
+	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	struct tegra_adma_desc *desc = NULL;
+
+	if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) {
+		dev_err(tdc2dev(tdc), "invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (buf_len % period_len) {
+		dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n");
+		return NULL;
+	}
+
+	if (!IS_ALIGNED(buf_addr, 4)) {
+		dev_err(tdc2dev(tdc), "invalid buffer alignment\n");
+		return NULL;
+	}
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->buf_len = buf_len;
+	desc->period_len = period_len;
+	desc->num_periods = buf_len / period_len;
+
+	if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) {
+		kfree(desc);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&tdc->vc, &desc->vd, flags);
+}
+
+static int tegra_adma_alloc_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	int ret;
+
+	ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc);
+	if (ret) {
+		dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n",
+			dma_chan_name(dc));
+		return ret;
+	}
+
+	ret = pm_runtime_get_sync(tdc2dev(tdc));
+	if (ret < 0) {
+		free_irq(tdc->irq, tdc);
+		return ret;
+	}
+
+	dma_cookie_init(&tdc->vc.chan);
+
+	return 0;
+}
+
+static void tegra_adma_free_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+	tegra_adma_terminate_all(dc);
+	vchan_free_chan_resources(&tdc->vc);
+	tasklet_kill(&tdc->vc.task);
+	free_irq(tdc->irq, tdc);
+	pm_runtime_put(tdc2dev(tdc));
+
+	tdc->sreq_index = 0;
+	tdc->sreq_dir = DMA_TRANS_NONE;
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct tegra_adma *tdma = ofdma->of_dma_data;
+	struct tegra_adma_chan *tdc;
+	struct dma_chan *chan;
+	unsigned int sreq_index;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	sreq_index = dma_spec->args[0];
+
+	if (sreq_index == 0) {
+		dev_err(tdma->dev, "DMA request must not be 0\n");
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&tdma->dma_dev);
+	if (!chan)
+		return NULL;
+
+	tdc = to_tegra_adma_chan(chan);
+	tdc->sreq_index = sreq_index;
+
+	return chan;
+}
+
+static int tegra_adma_runtime_suspend(struct device *dev)
+{
+	struct tegra_adma *tdma = dev_get_drvdata(dev);
+
+	tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD);
+
+	return pm_clk_suspend(dev);
+}
+
+static int tegra_adma_runtime_resume(struct device *dev)
+{
+	struct tegra_adma *tdma = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_clk_resume(dev);
+	if (ret)
+		return ret;
+
+	tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd);
+
+	return 0;
+}
+
+static const struct tegra_adma_chip_data tegra210_chip_data = {
+	.nr_channels = 22,
+};
+
+static const struct of_device_id tegra_adma_of_match[] = {
+	{ .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_adma_of_match);
+
+static int tegra_adma_probe(struct platform_device *pdev)
+{
+	const struct tegra_adma_chip_data *cdata;
+	struct tegra_adma *tdma;
+	struct resource	*res;
+	struct clk *clk;
+	int ret, i;
+
+	cdata = of_device_get_match_data(&pdev->dev);
+	if (!cdata) {
+		dev_err(&pdev->dev, "device match data not found\n");
+		return -ENODEV;
+	}
+
+	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
+			    sizeof(struct tegra_adma_chan), GFP_KERNEL);
+	if (!tdma)
+		return -ENOMEM;
+
+	tdma->dev = &pdev->dev;
+	tdma->nr_channels = cdata->nr_channels;
+	platform_set_drvdata(pdev, tdma);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tdma->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tdma->base_addr))
+		return PTR_ERR(tdma->base_addr);
+
+	ret = pm_clk_create(&pdev->dev);
+	if (ret)
+		return ret;
+
+	clk = clk_get(&pdev->dev, "d_audio");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "ADMA clock not found\n");
+		ret = PTR_ERR(clk);
+		goto clk_destroy;
+	}
+
+	ret = pm_clk_add_clk(&pdev->dev, clk);
+	if (ret) {
+		clk_put(clk);
+		goto clk_destroy;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0)
+		goto rpm_disable;
+
+	ret = tegra_adma_init(tdma);
+	if (ret)
+		goto rpm_put;
+
+	INIT_LIST_HEAD(&tdma->dma_dev.channels);
+	for (i = 0; i < tdma->nr_channels; i++) {
+		struct tegra_adma_chan *tdc = &tdma->channels[i];
+
+		tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i);
+
+		tdc->irq = of_irq_get(pdev->dev.of_node, i);
+		if (tdc->irq < 0) {
+			ret = tdc->irq;
+			goto irq_dispose;
+		}
+
+		vchan_init(&tdc->vc, &tdma->dma_dev);
+		tdc->vc.desc_free = tegra_adma_desc_free;
+		tdc->tdma = tdma;
+	}
+
+	dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+	tdma->dma_dev.dev = &pdev->dev;
+	tdma->dma_dev.device_alloc_chan_resources =
+					tegra_adma_alloc_chan_resources;
+	tdma->dma_dev.device_free_chan_resources =
+					tegra_adma_free_chan_resources;
+	tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending;
+	tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic;
+	tdma->dma_dev.device_config = tegra_adma_slave_config;
+	tdma->dma_dev.device_tx_status = tegra_adma_tx_status;
+	tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all;
+	tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+
+	ret = dma_async_device_register(&tdma->dma_dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret);
+		goto irq_dispose;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 tegra_dma_of_xlate, tdma);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret);
+		goto dma_remove;
+	}
+
+	pm_runtime_put(&pdev->dev);
+
+	dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n",
+		 tdma->nr_channels);
+
+	return 0;
+
+dma_remove:
+	dma_async_device_unregister(&tdma->dma_dev);
+irq_dispose:
+	while (--i >= 0)
+		irq_dispose_mapping(tdma->channels[i].irq);
+rpm_put:
+	pm_runtime_put_sync(&pdev->dev);
+rpm_disable:
+	pm_runtime_disable(&pdev->dev);
+clk_destroy:
+	pm_clk_destroy(&pdev->dev);
+
+	return ret;
+}
+
+static int tegra_adma_remove(struct platform_device *pdev)
+{
+	struct tegra_adma *tdma = platform_get_drvdata(pdev);
+	int i;
+
+	dma_async_device_unregister(&tdma->dma_dev);
+
+	for (i = 0; i < tdma->nr_channels; ++i)
+		irq_dispose_mapping(tdma->channels[i].irq);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	pm_clk_destroy(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_adma_pm_suspend(struct device *dev)
+{
+	return pm_runtime_suspended(dev) == false;
+}
+#endif
+
+static const struct dev_pm_ops tegra_adma_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend,
+			   tegra_adma_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_adma_pm_suspend, NULL)
+};
+
+static struct platform_driver tegra_admac_driver = {
+	.driver = {
+		.name	= "tegra-adma",
+		.pm	= &tegra_adma_dev_pm_ops,
+		.of_match_table = tegra_adma_of_match,
+	},
+	.probe		= tegra_adma_probe,
+	.remove		= tegra_adma_remove,
+};
+
+module_platform_driver(tegra_admac_driver);
+
+MODULE_ALIAS("platform:tegra210-adma");
+MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver");
+MODULE_AUTHOR("Dara Ramesh <dramesh@nvidia.com>");
+MODULE_AUTHOR("Jon Hunter <jonathanh@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
index ef67f27..df91185 100644
--- a/drivers/dma/xilinx/xilinx_vdma.c
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -16,6 +16,15 @@
  * video device (S2MM). Initialization, status, interrupt and management
  * registers are accessed through an AXI4-Lite slave interface.
  *
+ * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that
+ * provides high-bandwidth one dimensional direct memory access between memory
+ * and AXI4-Stream target peripherals. It supports one receive and one
+ * transmit channel, both of them optional at synthesis time.
+ *
+ * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory
+ * Access (DMA) between a memory-mapped source address and a memory-mapped
+ * destination address.
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
@@ -35,116 +44,138 @@
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 #include "../dmaengine.h"
 
 /* Register/Descriptor Offsets */
-#define XILINX_VDMA_MM2S_CTRL_OFFSET		0x0000
-#define XILINX_VDMA_S2MM_CTRL_OFFSET		0x0030
+#define XILINX_DMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_DMA_S2MM_CTRL_OFFSET		0x0030
 #define XILINX_VDMA_MM2S_DESC_OFFSET		0x0050
 #define XILINX_VDMA_S2MM_DESC_OFFSET		0x00a0
 
 /* Control Registers */
-#define XILINX_VDMA_REG_DMACR			0x0000
-#define XILINX_VDMA_DMACR_DELAY_MAX		0xff
-#define XILINX_VDMA_DMACR_DELAY_SHIFT		24
-#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX	0xff
-#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT	16
-#define XILINX_VDMA_DMACR_ERR_IRQ		BIT(14)
-#define XILINX_VDMA_DMACR_DLY_CNT_IRQ		BIT(13)
-#define XILINX_VDMA_DMACR_FRM_CNT_IRQ		BIT(12)
-#define XILINX_VDMA_DMACR_MASTER_SHIFT		8
-#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT	5
-#define XILINX_VDMA_DMACR_FRAMECNT_EN		BIT(4)
-#define XILINX_VDMA_DMACR_GENLOCK_EN		BIT(3)
-#define XILINX_VDMA_DMACR_RESET			BIT(2)
-#define XILINX_VDMA_DMACR_CIRC_EN		BIT(1)
-#define XILINX_VDMA_DMACR_RUNSTOP		BIT(0)
-#define XILINX_VDMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
+#define XILINX_DMA_REG_DMACR			0x0000
+#define XILINX_DMA_DMACR_DELAY_MAX		0xff
+#define XILINX_DMA_DMACR_DELAY_SHIFT		24
+#define XILINX_DMA_DMACR_FRAME_COUNT_MAX	0xff
+#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT	16
+#define XILINX_DMA_DMACR_ERR_IRQ		BIT(14)
+#define XILINX_DMA_DMACR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_DMA_DMACR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_DMA_DMACR_MASTER_SHIFT		8
+#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT	5
+#define XILINX_DMA_DMACR_FRAMECNT_EN		BIT(4)
+#define XILINX_DMA_DMACR_GENLOCK_EN		BIT(3)
+#define XILINX_DMA_DMACR_RESET			BIT(2)
+#define XILINX_DMA_DMACR_CIRC_EN		BIT(1)
+#define XILINX_DMA_DMACR_RUNSTOP		BIT(0)
+#define XILINX_DMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
 
-#define XILINX_VDMA_REG_DMASR			0x0004
-#define XILINX_VDMA_DMASR_EOL_LATE_ERR		BIT(15)
-#define XILINX_VDMA_DMASR_ERR_IRQ		BIT(14)
-#define XILINX_VDMA_DMASR_DLY_CNT_IRQ		BIT(13)
-#define XILINX_VDMA_DMASR_FRM_CNT_IRQ		BIT(12)
-#define XILINX_VDMA_DMASR_SOF_LATE_ERR		BIT(11)
-#define XILINX_VDMA_DMASR_SG_DEC_ERR		BIT(10)
-#define XILINX_VDMA_DMASR_SG_SLV_ERR		BIT(9)
-#define XILINX_VDMA_DMASR_EOF_EARLY_ERR		BIT(8)
-#define XILINX_VDMA_DMASR_SOF_EARLY_ERR		BIT(7)
-#define XILINX_VDMA_DMASR_DMA_DEC_ERR		BIT(6)
-#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR		BIT(5)
-#define XILINX_VDMA_DMASR_DMA_INT_ERR		BIT(4)
-#define XILINX_VDMA_DMASR_IDLE			BIT(1)
-#define XILINX_VDMA_DMASR_HALTED		BIT(0)
-#define XILINX_VDMA_DMASR_DELAY_MASK		GENMASK(31, 24)
-#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
+#define XILINX_DMA_REG_DMASR			0x0004
+#define XILINX_DMA_DMASR_EOL_LATE_ERR		BIT(15)
+#define XILINX_DMA_DMASR_ERR_IRQ		BIT(14)
+#define XILINX_DMA_DMASR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_DMA_DMASR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_DMA_DMASR_SOF_LATE_ERR		BIT(11)
+#define XILINX_DMA_DMASR_SG_DEC_ERR		BIT(10)
+#define XILINX_DMA_DMASR_SG_SLV_ERR		BIT(9)
+#define XILINX_DMA_DMASR_EOF_EARLY_ERR		BIT(8)
+#define XILINX_DMA_DMASR_SOF_EARLY_ERR		BIT(7)
+#define XILINX_DMA_DMASR_DMA_DEC_ERR		BIT(6)
+#define XILINX_DMA_DMASR_DMA_SLAVE_ERR		BIT(5)
+#define XILINX_DMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_DMA_DMASR_IDLE			BIT(1)
+#define XILINX_DMA_DMASR_HALTED		BIT(0)
+#define XILINX_DMA_DMASR_DELAY_MASK		GENMASK(31, 24)
+#define XILINX_DMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
 
-#define XILINX_VDMA_REG_CURDESC			0x0008
-#define XILINX_VDMA_REG_TAILDESC		0x0010
-#define XILINX_VDMA_REG_REG_INDEX		0x0014
-#define XILINX_VDMA_REG_FRMSTORE		0x0018
-#define XILINX_VDMA_REG_THRESHOLD		0x001c
-#define XILINX_VDMA_REG_FRMPTR_STS		0x0024
-#define XILINX_VDMA_REG_PARK_PTR		0x0028
-#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT	8
-#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT	0
-#define XILINX_VDMA_REG_VDMA_VERSION		0x002c
+#define XILINX_DMA_REG_CURDESC			0x0008
+#define XILINX_DMA_REG_TAILDESC		0x0010
+#define XILINX_DMA_REG_REG_INDEX		0x0014
+#define XILINX_DMA_REG_FRMSTORE		0x0018
+#define XILINX_DMA_REG_THRESHOLD		0x001c
+#define XILINX_DMA_REG_FRMPTR_STS		0x0024
+#define XILINX_DMA_REG_PARK_PTR		0x0028
+#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT	8
+#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT	0
+#define XILINX_DMA_REG_VDMA_VERSION		0x002c
 
 /* Register Direct Mode Registers */
-#define XILINX_VDMA_REG_VSIZE			0x0000
-#define XILINX_VDMA_REG_HSIZE			0x0004
+#define XILINX_DMA_REG_VSIZE			0x0000
+#define XILINX_DMA_REG_HSIZE			0x0004
 
-#define XILINX_VDMA_REG_FRMDLY_STRIDE		0x0008
-#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
-#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
+#define XILINX_DMA_REG_FRMDLY_STRIDE		0x0008
+#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
+#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
 
 #define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
+#define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
 
 /* HW specific definitions */
-#define XILINX_VDMA_MAX_CHANS_PER_DEVICE	0x2
+#define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x2
 
-#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK	\
-		(XILINX_VDMA_DMASR_FRM_CNT_IRQ | \
-		 XILINX_VDMA_DMASR_DLY_CNT_IRQ | \
-		 XILINX_VDMA_DMASR_ERR_IRQ)
+#define XILINX_DMA_DMAXR_ALL_IRQ_MASK	\
+		(XILINX_DMA_DMASR_FRM_CNT_IRQ | \
+		 XILINX_DMA_DMASR_DLY_CNT_IRQ | \
+		 XILINX_DMA_DMASR_ERR_IRQ)
 
-#define XILINX_VDMA_DMASR_ALL_ERR_MASK	\
-		(XILINX_VDMA_DMASR_EOL_LATE_ERR | \
-		 XILINX_VDMA_DMASR_SOF_LATE_ERR | \
-		 XILINX_VDMA_DMASR_SG_DEC_ERR | \
-		 XILINX_VDMA_DMASR_SG_SLV_ERR | \
-		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
-		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
-		 XILINX_VDMA_DMASR_DMA_DEC_ERR | \
-		 XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \
-		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+#define XILINX_DMA_DMASR_ALL_ERR_MASK	\
+		(XILINX_DMA_DMASR_EOL_LATE_ERR | \
+		 XILINX_DMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_DMA_DMASR_SG_DEC_ERR | \
+		 XILINX_DMA_DMASR_SG_SLV_ERR | \
+		 XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_DMA_DEC_ERR | \
+		 XILINX_DMA_DMASR_DMA_SLAVE_ERR | \
+		 XILINX_DMA_DMASR_DMA_INT_ERR)
 
 /*
  * Recoverable errors are DMA Internal error, SOF Early, EOF Early
  * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
  * is enabled in the h/w system.
  */
-#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK	\
-		(XILINX_VDMA_DMASR_SOF_LATE_ERR | \
-		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
-		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
-		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+#define XILINX_DMA_DMASR_ERR_RECOVER_MASK	\
+		(XILINX_DMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_DMA_INT_ERR)
 
 /* Axi VDMA Flush on Fsync bits */
-#define XILINX_VDMA_FLUSH_S2MM		3
-#define XILINX_VDMA_FLUSH_MM2S		2
-#define XILINX_VDMA_FLUSH_BOTH		1
+#define XILINX_DMA_FLUSH_S2MM		3
+#define XILINX_DMA_FLUSH_MM2S		2
+#define XILINX_DMA_FLUSH_BOTH		1
 
 /* Delay loop counter to prevent hardware failure */
-#define XILINX_VDMA_LOOP_COUNT		1000000
+#define XILINX_DMA_LOOP_COUNT		1000000
+
+/* AXI DMA Specific Registers/Offsets */
+#define XILINX_DMA_REG_SRCDSTADDR	0x18
+#define XILINX_DMA_REG_BTT		0x28
+
+/* AXI DMA Specific Masks/Bit fields */
+#define XILINX_DMA_MAX_TRANS_LEN	GENMASK(22, 0)
+#define XILINX_DMA_CR_COALESCE_MAX	GENMASK(23, 16)
+#define XILINX_DMA_CR_COALESCE_SHIFT	16
+#define XILINX_DMA_BD_SOP		BIT(27)
+#define XILINX_DMA_BD_EOP		BIT(26)
+#define XILINX_DMA_COALESCE_MAX		255
+#define XILINX_DMA_NUM_APP_WORDS	5
+
+/* AXI CDMA Specific Registers/Offsets */
+#define XILINX_CDMA_REG_SRCADDR		0x18
+#define XILINX_CDMA_REG_DSTADDR		0x20
+
+/* AXI CDMA Specific Masks */
+#define XILINX_CDMA_CR_SGMODE          BIT(3)
 
 /**
  * struct xilinx_vdma_desc_hw - Hardware Descriptor
  * @next_desc: Next Descriptor Pointer @0x00
  * @pad1: Reserved @0x04
  * @buf_addr: Buffer address @0x08
- * @pad2: Reserved @0x0C
+ * @buf_addr_msb: MSB of Buffer address @0x0C
  * @vsize: Vertical Size @0x10
  * @hsize: Horizontal Size @0x14
  * @stride: Number of bytes between the first
@@ -154,13 +185,59 @@
 	u32 next_desc;
 	u32 pad1;
 	u32 buf_addr;
-	u32 pad2;
+	u32 buf_addr_msb;
 	u32 vsize;
 	u32 hsize;
 	u32 stride;
 } __aligned(64);
 
 /**
+ * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @pad2: Reserved @0x0C
+ * @pad3: Reserved @0x10
+ * @pad4: Reserved @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_axidma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 buf_addr;
+	u32 pad2;
+	u32 pad3;
+	u32 pad4;
+	u32 control;
+	u32 status;
+	u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @src_addr: Source address @0x08
+ * @pad2: Reserved @0x0C
+ * @dest_addr: Destination address @0x10
+ * @pad3: Reserved @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ */
+struct xilinx_cdma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 src_addr;
+	u32 pad2;
+	u32 dest_addr;
+	u32 pad3;
+	u32 control;
+	u32 status;
+} __aligned(64);
+
+/**
  * struct xilinx_vdma_tx_segment - Descriptor segment
  * @hw: Hardware descriptor
  * @node: Node in the descriptor segments list
@@ -173,19 +250,43 @@
 } __aligned(64);
 
 /**
- * struct xilinx_vdma_tx_descriptor - Per Transaction structure
+ * struct xilinx_axidma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_axidma_tx_segment {
+	struct xilinx_axidma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_cdma_tx_segment {
+	struct xilinx_cdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_dma_tx_descriptor - Per Transaction structure
  * @async_tx: Async transaction descriptor
  * @segments: TX segments list
  * @node: Node in the channel descriptors list
  */
-struct xilinx_vdma_tx_descriptor {
+struct xilinx_dma_tx_descriptor {
 	struct dma_async_tx_descriptor async_tx;
 	struct list_head segments;
 	struct list_head node;
 };
 
 /**
- * struct xilinx_vdma_chan - Driver specific VDMA channel structure
+ * struct xilinx_dma_chan - Driver specific DMA channel structure
  * @xdev: Driver specific device structure
  * @ctrl_offset: Control registers offset
  * @desc_offset: TX descriptor registers offset
@@ -207,9 +308,14 @@
  * @config: Device configuration info
  * @flush_on_fsync: Flush on Frame sync
  * @desc_pendingcount: Descriptor pending count
+ * @ext_addr: Indicates 64 bit addressing is supported by dma channel
+ * @desc_submitcount: Descriptor h/w submitted count
+ * @residue: Residue for AXI DMA
+ * @seg_v: Statically allocated segments base
+ * @start_transfer: Differentiate b/w DMA IP's transfer
  */
-struct xilinx_vdma_chan {
-	struct xilinx_vdma_device *xdev;
+struct xilinx_dma_chan {
+	struct xilinx_dma_device *xdev;
 	u32 ctrl_offset;
 	u32 desc_offset;
 	spinlock_t lock;
@@ -230,73 +336,122 @@
 	struct xilinx_vdma_config config;
 	bool flush_on_fsync;
 	u32 desc_pendingcount;
+	bool ext_addr;
+	u32 desc_submitcount;
+	u32 residue;
+	struct xilinx_axidma_tx_segment *seg_v;
+	void (*start_transfer)(struct xilinx_dma_chan *chan);
+};
+
+struct xilinx_dma_config {
+	enum xdma_ip_type dmatype;
+	int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk,
+			struct clk **tx_clk, struct clk **txs_clk,
+			struct clk **rx_clk, struct clk **rxs_clk);
 };
 
 /**
- * struct xilinx_vdma_device - VDMA device structure
+ * struct xilinx_dma_device - DMA device structure
  * @regs: I/O mapped base address
  * @dev: Device Structure
  * @common: DMA device structure
- * @chan: Driver specific VDMA channel
+ * @chan: Driver specific DMA channel
  * @has_sg: Specifies whether Scatter-Gather is present or not
  * @flush_on_fsync: Flush on frame sync
+ * @ext_addr: Indicates 64 bit addressing is supported by dma device
+ * @pdev: Platform device structure pointer
+ * @dma_config: DMA config structure
+ * @axi_clk: DMA Axi4-lite interace clock
+ * @tx_clk: DMA mm2s clock
+ * @txs_clk: DMA mm2s stream clock
+ * @rx_clk: DMA s2mm clock
+ * @rxs_clk: DMA s2mm stream clock
  */
-struct xilinx_vdma_device {
+struct xilinx_dma_device {
 	void __iomem *regs;
 	struct device *dev;
 	struct dma_device common;
-	struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE];
+	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
 	bool has_sg;
 	u32 flush_on_fsync;
+	bool ext_addr;
+	struct platform_device  *pdev;
+	const struct xilinx_dma_config *dma_config;
+	struct clk *axi_clk;
+	struct clk *tx_clk;
+	struct clk *txs_clk;
+	struct clk *rx_clk;
+	struct clk *rxs_clk;
 };
 
 /* Macros */
 #define to_xilinx_chan(chan) \
-	container_of(chan, struct xilinx_vdma_chan, common)
-#define to_vdma_tx_descriptor(tx) \
-	container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx)
-#define xilinx_vdma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
+	container_of(chan, struct xilinx_dma_chan, common)
+#define to_dma_tx_descriptor(tx) \
+	container_of(tx, struct xilinx_dma_tx_descriptor, async_tx)
+#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
 	readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \
 			   cond, delay_us, timeout_us)
 
 /* IO accessors */
-static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg)
+static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg)
 {
 	return ioread32(chan->xdev->regs + reg);
 }
 
-static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value)
+static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value)
 {
 	iowrite32(value, chan->xdev->regs + reg);
 }
 
-static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg,
+static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg,
 				   u32 value)
 {
-	vdma_write(chan, chan->desc_offset + reg, value);
+	dma_write(chan, chan->desc_offset + reg, value);
 }
 
-static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg)
+static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg)
 {
-	return vdma_read(chan, chan->ctrl_offset + reg);
+	return dma_read(chan, chan->ctrl_offset + reg);
 }
 
-static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg,
+static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg,
 				   u32 value)
 {
-	vdma_write(chan, chan->ctrl_offset + reg, value);
+	dma_write(chan, chan->ctrl_offset + reg, value);
 }
 
-static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg,
+static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg,
 				 u32 clr)
 {
-	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr);
+	dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr);
 }
 
-static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg,
+static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg,
 				 u32 set)
 {
-	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set);
+	dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set);
+}
+
+/**
+ * vdma_desc_write_64 - 64-bit descriptor write
+ * @chan: Driver specific VDMA channel
+ * @reg: Register to write
+ * @value_lsb: lower address of the descriptor.
+ * @value_msb: upper address of the descriptor.
+ *
+ * Since vdma driver is trying to write to a register offset which is not a
+ * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits
+ * instead of a single 64 bit register write.
+ */
+static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg,
+				      u32 value_lsb, u32 value_msb)
+{
+	/* Write the lsb 32 bits*/
+	writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg);
+
+	/* Write the msb 32 bits */
+	writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4);
 }
 
 /* -----------------------------------------------------------------------------
@@ -305,16 +460,37 @@
 
 /**
  * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
- * @chan: Driver specific VDMA channel
+ * @chan: Driver specific DMA channel
  *
  * Return: The allocated segment on success and NULL on failure.
  */
 static struct xilinx_vdma_tx_segment *
-xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan)
+xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
 {
 	struct xilinx_vdma_tx_segment *segment;
 	dma_addr_t phys;
 
+	segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_cdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_cdma_tx_segment *
+xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_cdma_tx_segment *segment;
+	dma_addr_t phys;
+
 	segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys);
 	if (!segment)
 		return NULL;
@@ -326,26 +502,70 @@
 }
 
 /**
- * xilinx_vdma_free_tx_segment - Free transaction segment
- * @chan: Driver specific VDMA channel
- * @segment: VDMA transaction segment
+ * xilinx_axidma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
  */
-static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan,
+static struct xilinx_axidma_tx_segment *
+xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_axidma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	memset(segment, 0, sizeof(*segment));
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_dma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan,
+				struct xilinx_axidma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_cdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan,
+				struct xilinx_cdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan,
 					struct xilinx_vdma_tx_segment *segment)
 {
 	dma_pool_free(chan->desc_pool, segment, segment->phys);
 }
 
 /**
- * xilinx_vdma_tx_descriptor - Allocate transaction descriptor
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific DMA channel
  *
  * Return: The allocated descriptor on success and NULL on failure.
  */
-static struct xilinx_vdma_tx_descriptor *
-xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan)
+static struct xilinx_dma_tx_descriptor *
+xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan)
 {
-	struct xilinx_vdma_tx_descriptor *desc;
+	struct xilinx_dma_tx_descriptor *desc;
 
 	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
 	if (!desc)
@@ -357,22 +577,38 @@
 }
 
 /**
- * xilinx_vdma_free_tx_descriptor - Free transaction descriptor
- * @chan: Driver specific VDMA channel
- * @desc: VDMA transaction descriptor
+ * xilinx_dma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific DMA channel
+ * @desc: DMA transaction descriptor
  */
 static void
-xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan,
-			       struct xilinx_vdma_tx_descriptor *desc)
+xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan,
+			       struct xilinx_dma_tx_descriptor *desc)
 {
 	struct xilinx_vdma_tx_segment *segment, *next;
+	struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next;
+	struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next;
 
 	if (!desc)
 		return;
 
-	list_for_each_entry_safe(segment, next, &desc->segments, node) {
-		list_del(&segment->node);
-		xilinx_vdma_free_tx_segment(chan, segment);
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		list_for_each_entry_safe(segment, next, &desc->segments, node) {
+			list_del(&segment->node);
+			xilinx_vdma_free_tx_segment(chan, segment);
+		}
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		list_for_each_entry_safe(cdma_segment, cdma_next,
+					 &desc->segments, node) {
+			list_del(&cdma_segment->node);
+			xilinx_cdma_free_tx_segment(chan, cdma_segment);
+		}
+	} else {
+		list_for_each_entry_safe(axidma_segment, axidma_next,
+					 &desc->segments, node) {
+			list_del(&axidma_segment->node);
+			xilinx_dma_free_tx_segment(chan, axidma_segment);
+		}
 	}
 
 	kfree(desc);
@@ -381,60 +617,62 @@
 /* Required functions */
 
 /**
- * xilinx_vdma_free_desc_list - Free descriptors list
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_free_desc_list - Free descriptors list
+ * @chan: Driver specific DMA channel
  * @list: List to parse and delete the descriptor
  */
-static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan,
+static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan,
 					struct list_head *list)
 {
-	struct xilinx_vdma_tx_descriptor *desc, *next;
+	struct xilinx_dma_tx_descriptor *desc, *next;
 
 	list_for_each_entry_safe(desc, next, list, node) {
 		list_del(&desc->node);
-		xilinx_vdma_free_tx_descriptor(chan, desc);
+		xilinx_dma_free_tx_descriptor(chan, desc);
 	}
 }
 
 /**
- * xilinx_vdma_free_descriptors - Free channel descriptors
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific DMA channel
  */
-static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->lock, flags);
 
-	xilinx_vdma_free_desc_list(chan, &chan->pending_list);
-	xilinx_vdma_free_desc_list(chan, &chan->done_list);
-	xilinx_vdma_free_desc_list(chan, &chan->active_list);
+	xilinx_dma_free_desc_list(chan, &chan->pending_list);
+	xilinx_dma_free_desc_list(chan, &chan->done_list);
+	xilinx_dma_free_desc_list(chan, &chan->active_list);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
- * xilinx_vdma_free_chan_resources - Free channel resources
+ * xilinx_dma_free_chan_resources - Free channel resources
  * @dchan: DMA channel
  */
-static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan)
+static void xilinx_dma_free_chan_resources(struct dma_chan *dchan)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 
 	dev_dbg(chan->dev, "Free all channel resources.\n");
 
-	xilinx_vdma_free_descriptors(chan);
+	xilinx_dma_free_descriptors(chan);
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		xilinx_dma_free_tx_segment(chan, chan->seg_v);
 	dma_pool_destroy(chan->desc_pool);
 	chan->desc_pool = NULL;
 }
 
 /**
- * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific DMA channel
  */
-static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
 {
-	struct xilinx_vdma_tx_descriptor *desc, *next;
+	struct xilinx_dma_tx_descriptor *desc, *next;
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->lock, flags);
@@ -457,32 +695,32 @@
 
 		/* Run any dependencies, then free the descriptor */
 		dma_run_dependencies(&desc->async_tx);
-		xilinx_vdma_free_tx_descriptor(chan, desc);
+		xilinx_dma_free_tx_descriptor(chan, desc);
 	}
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
- * xilinx_vdma_do_tasklet - Schedule completion tasklet
- * @data: Pointer to the Xilinx VDMA channel structure
+ * xilinx_dma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Xilinx DMA channel structure
  */
-static void xilinx_vdma_do_tasklet(unsigned long data)
+static void xilinx_dma_do_tasklet(unsigned long data)
 {
-	struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data;
+	struct xilinx_dma_chan *chan = (struct xilinx_dma_chan *)data;
 
-	xilinx_vdma_chan_desc_cleanup(chan);
+	xilinx_dma_chan_desc_cleanup(chan);
 }
 
 /**
- * xilinx_vdma_alloc_chan_resources - Allocate channel resources
+ * xilinx_dma_alloc_chan_resources - Allocate channel resources
  * @dchan: DMA channel
  *
  * Return: '0' on success and failure value on error
  */
-static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan)
+static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 
 	/* Has this channel already been allocated? */
 	if (chan->desc_pool)
@@ -492,10 +730,26 @@
 	 * We need the descriptor to be aligned to 64bytes
 	 * for meeting Xilinx VDMA specification requirement.
 	 */
-	chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
-				chan->dev,
-				sizeof(struct xilinx_vdma_tx_segment),
-				__alignof__(struct xilinx_vdma_tx_segment), 0);
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		chan->desc_pool = dma_pool_create("xilinx_dma_desc_pool",
+				   chan->dev,
+				   sizeof(struct xilinx_axidma_tx_segment),
+				   __alignof__(struct xilinx_axidma_tx_segment),
+				   0);
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool",
+				   chan->dev,
+				   sizeof(struct xilinx_cdma_tx_segment),
+				   __alignof__(struct xilinx_cdma_tx_segment),
+				   0);
+	} else {
+		chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+				     chan->dev,
+				     sizeof(struct xilinx_vdma_tx_segment),
+				     __alignof__(struct xilinx_vdma_tx_segment),
+				     0);
+	}
+
 	if (!chan->desc_pool) {
 		dev_err(chan->dev,
 			"unable to allocate channel %d descriptor pool\n",
@@ -503,110 +757,160 @@
 		return -ENOMEM;
 	}
 
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		/*
+		 * For AXI DMA case after submitting a pending_list, keep
+		 * an extra segment allocated so that the "next descriptor"
+		 * pointer on the tail descriptor always points to a
+		 * valid descriptor, even when paused after reaching taildesc.
+		 * This way, it is possible to issue additional
+		 * transfers without halting and restarting the channel.
+		 */
+		chan->seg_v = xilinx_axidma_alloc_tx_segment(chan);
+
 	dma_cookie_init(dchan);
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		/* For AXI DMA resetting once channel will reset the
+		 * other channel as well so enable the interrupts here.
+		 */
+		dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+			      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+	}
+
+	if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
+		dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+			     XILINX_CDMA_CR_SGMODE);
+
 	return 0;
 }
 
 /**
- * xilinx_vdma_tx_status - Get VDMA transaction status
+ * xilinx_dma_tx_status - Get DMA transaction status
  * @dchan: DMA channel
  * @cookie: Transaction identifier
  * @txstate: Transaction state
  *
  * Return: DMA transaction status
  */
-static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan,
+static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
 					dma_cookie_t cookie,
 					struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dchan, cookie, txstate);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment;
+	struct xilinx_axidma_desc_hw *hw;
+	enum dma_status ret;
+	unsigned long flags;
+	u32 residue = 0;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		spin_lock_irqsave(&chan->lock, flags);
+
+		desc = list_last_entry(&chan->active_list,
+				       struct xilinx_dma_tx_descriptor, node);
+		if (chan->has_sg) {
+			list_for_each_entry(segment, &desc->segments, node) {
+				hw = &segment->hw;
+				residue += (hw->control - hw->status) &
+					   XILINX_DMA_MAX_TRANS_LEN;
+			}
+		}
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		chan->residue = residue;
+		dma_set_residue(txstate, chan->residue);
+	}
+
+	return ret;
 }
 
 /**
- * xilinx_vdma_is_running - Check if VDMA channel is running
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_is_running - Check if DMA channel is running
+ * @chan: Driver specific DMA channel
  *
  * Return: '1' if running, '0' if not.
  */
-static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan)
+static bool xilinx_dma_is_running(struct xilinx_dma_chan *chan)
 {
-	return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
-		 XILINX_VDMA_DMASR_HALTED) &&
-		(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
-		 XILINX_VDMA_DMACR_RUNSTOP);
+	return !(dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+		 XILINX_DMA_DMASR_HALTED) &&
+		(dma_ctrl_read(chan, XILINX_DMA_REG_DMACR) &
+		 XILINX_DMA_DMACR_RUNSTOP);
 }
 
 /**
- * xilinx_vdma_is_idle - Check if VDMA channel is idle
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_is_idle - Check if DMA channel is idle
+ * @chan: Driver specific DMA channel
  *
  * Return: '1' if idle, '0' if not.
  */
-static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan)
+static bool xilinx_dma_is_idle(struct xilinx_dma_chan *chan)
 {
-	return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
-		XILINX_VDMA_DMASR_IDLE;
+	return dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+		XILINX_DMA_DMASR_IDLE;
 }
 
 /**
- * xilinx_vdma_halt - Halt VDMA channel
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_halt - Halt DMA channel
+ * @chan: Driver specific DMA channel
  */
-static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_halt(struct xilinx_dma_chan *chan)
 {
 	int err;
 	u32 val;
 
-	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
 
 	/* Wait for the hardware to halt */
-	err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val,
-				      (val & XILINX_VDMA_DMASR_HALTED), 0,
-				      XILINX_VDMA_LOOP_COUNT);
+	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				      (val & XILINX_DMA_DMASR_HALTED), 0,
+				      XILINX_DMA_LOOP_COUNT);
 
 	if (err) {
 		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
-			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
 		chan->err = true;
 	}
-
-	return;
 }
 
 /**
- * xilinx_vdma_start - Start VDMA channel
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_start - Start DMA channel
+ * @chan: Driver specific DMA channel
  */
-static void xilinx_vdma_start(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_start(struct xilinx_dma_chan *chan)
 {
 	int err;
 	u32 val;
 
-	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
 
 	/* Wait for the hardware to start */
-	err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val,
-				      !(val & XILINX_VDMA_DMASR_HALTED), 0,
-				      XILINX_VDMA_LOOP_COUNT);
+	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				      !(val & XILINX_DMA_DMASR_HALTED), 0,
+				      XILINX_DMA_LOOP_COUNT);
 
 	if (err) {
 		dev_err(chan->dev, "Cannot start channel %p: %x\n",
-			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
 
 		chan->err = true;
 	}
-
-	return;
 }
 
 /**
  * xilinx_vdma_start_transfer - Starts VDMA transfer
  * @chan: Driver specific channel struct pointer
  */
-static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan)
+static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 {
 	struct xilinx_vdma_config *config = &chan->config;
-	struct xilinx_vdma_tx_descriptor *desc, *tail_desc;
+	struct xilinx_dma_tx_descriptor *desc, *tail_desc;
 	u32 reg;
 	struct xilinx_vdma_tx_segment *tail_segment;
 
@@ -618,16 +922,16 @@
 		return;
 
 	desc = list_first_entry(&chan->pending_list,
-				struct xilinx_vdma_tx_descriptor, node);
+				struct xilinx_dma_tx_descriptor, node);
 	tail_desc = list_last_entry(&chan->pending_list,
-				    struct xilinx_vdma_tx_descriptor, node);
+				    struct xilinx_dma_tx_descriptor, node);
 
 	tail_segment = list_last_entry(&tail_desc->segments,
 				       struct xilinx_vdma_tx_segment, node);
 
 	/* If it is SG mode and hardware is busy, cannot submit */
-	if (chan->has_sg && xilinx_vdma_is_running(chan) &&
-	    !xilinx_vdma_is_idle(chan)) {
+	if (chan->has_sg && xilinx_dma_is_running(chan) &&
+	    !xilinx_dma_is_idle(chan)) {
 		dev_dbg(chan->dev, "DMA controller still busy\n");
 		return;
 	}
@@ -637,19 +941,19 @@
 	 * done, start new transfers
 	 */
 	if (chan->has_sg)
-		vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC,
+		dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
 				desc->async_tx.phys);
 
 	/* Configure the hardware using info in the config structure */
-	reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
 	if (config->frm_cnt_en)
-		reg |= XILINX_VDMA_DMACR_FRAMECNT_EN;
+		reg |= XILINX_DMA_DMACR_FRAMECNT_EN;
 	else
-		reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN;
+		reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN;
 
 	/* Configure channel to allow number frame buffers */
-	vdma_ctrl_write(chan, XILINX_VDMA_REG_FRMSTORE,
+	dma_ctrl_write(chan, XILINX_DMA_REG_FRMSTORE,
 			chan->desc_pendingcount);
 
 	/*
@@ -657,45 +961,53 @@
 	 * In direct register mode, if not parking, enable circular mode
 	 */
 	if (chan->has_sg || !config->park)
-		reg |= XILINX_VDMA_DMACR_CIRC_EN;
+		reg |= XILINX_DMA_DMACR_CIRC_EN;
 
 	if (config->park)
-		reg &= ~XILINX_VDMA_DMACR_CIRC_EN;
+		reg &= ~XILINX_DMA_DMACR_CIRC_EN;
 
-	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg);
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
 
 	if (config->park && (config->park_frm >= 0) &&
 			(config->park_frm < chan->num_frms)) {
 		if (chan->direction == DMA_MEM_TO_DEV)
-			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+			dma_write(chan, XILINX_DMA_REG_PARK_PTR,
 				config->park_frm <<
-					XILINX_VDMA_PARK_PTR_RD_REF_SHIFT);
+					XILINX_DMA_PARK_PTR_RD_REF_SHIFT);
 		else
-			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+			dma_write(chan, XILINX_DMA_REG_PARK_PTR,
 				config->park_frm <<
-					XILINX_VDMA_PARK_PTR_WR_REF_SHIFT);
+					XILINX_DMA_PARK_PTR_WR_REF_SHIFT);
 	}
 
 	/* Start the hardware */
-	xilinx_vdma_start(chan);
+	xilinx_dma_start(chan);
 
 	if (chan->err)
 		return;
 
 	/* Start the transfer */
 	if (chan->has_sg) {
-		vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC,
+		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
 				tail_segment->phys);
 	} else {
 		struct xilinx_vdma_tx_segment *segment, *last = NULL;
 		int i = 0;
 
-		list_for_each_entry(desc, &chan->pending_list, node) {
-			segment = list_first_entry(&desc->segments,
-					   struct xilinx_vdma_tx_segment, node);
-			vdma_desc_write(chan,
+		if (chan->desc_submitcount < chan->num_frms)
+			i = chan->desc_submitcount;
+
+		list_for_each_entry(segment, &desc->segments, node) {
+			if (chan->ext_addr)
+				vdma_desc_write_64(chan,
+					XILINX_VDMA_REG_START_ADDRESS_64(i++),
+					segment->hw.buf_addr,
+					segment->hw.buf_addr_msb);
+			else
+				vdma_desc_write(chan,
 					XILINX_VDMA_REG_START_ADDRESS(i++),
 					segment->hw.buf_addr);
+
 			last = segment;
 		}
 
@@ -703,10 +1015,79 @@
 			return;
 
 		/* HW expects these parameters to be same for one transaction */
-		vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize);
-		vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE,
+		vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize);
+		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
 				last->hw.stride);
-		vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize);
+		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
+	}
+
+	if (!chan->has_sg) {
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &chan->active_list);
+		chan->desc_submitcount++;
+		chan->desc_pendingcount--;
+		if (chan->desc_submitcount == chan->num_frms)
+			chan->desc_submitcount = 0;
+	} else {
+		list_splice_tail_init(&chan->pending_list, &chan->active_list);
+		chan->desc_pendingcount = 0;
+	}
+}
+
+/**
+ * xilinx_cdma_start_transfer - Starts cdma transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_cdma_tx_segment *tail_segment;
+	u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR);
+
+	if (chan->err)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_cdma_tx_segment, node);
+
+	if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+		ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+		ctrl_reg |= chan->desc_pendingcount <<
+				XILINX_DMA_CR_COALESCE_SHIFT;
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg);
+	}
+
+	if (chan->has_sg) {
+		dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+			   head_desc->async_tx.phys);
+
+		/* Update tail ptr register which will start the transfer */
+		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+			       tail_segment->phys);
+	} else {
+		/* In simple mode */
+		struct xilinx_cdma_tx_segment *segment;
+		struct xilinx_cdma_desc_hw *hw;
+
+		segment = list_first_entry(&head_desc->segments,
+					   struct xilinx_cdma_tx_segment,
+					   node);
+
+		hw = &segment->hw;
+
+		dma_ctrl_write(chan, XILINX_CDMA_REG_SRCADDR, hw->src_addr);
+		dma_ctrl_write(chan, XILINX_CDMA_REG_DSTADDR, hw->dest_addr);
+
+		/* Start the transfer */
+		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+				hw->control & XILINX_DMA_MAX_TRANS_LEN);
 	}
 
 	list_splice_tail_init(&chan->pending_list, &chan->active_list);
@@ -714,28 +1095,113 @@
 }
 
 /**
- * xilinx_vdma_issue_pending - Issue pending transactions
+ * xilinx_dma_start_transfer - Starts DMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_axidma_tx_segment *tail_segment, *old_head, *new_head;
+	u32 reg;
+
+	if (chan->err)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	/* If it is SG mode and hardware is busy, cannot submit */
+	if (chan->has_sg && xilinx_dma_is_running(chan) &&
+	    !xilinx_dma_is_idle(chan)) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		return;
+	}
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_axidma_tx_segment, node);
+
+	old_head = list_first_entry(&head_desc->segments,
+				struct xilinx_axidma_tx_segment, node);
+	new_head = chan->seg_v;
+	/* Copy Buffer Descriptor fields. */
+	new_head->hw = old_head->hw;
+
+	/* Swap and save new reserve */
+	list_replace_init(&old_head->node, &new_head->node);
+	chan->seg_v = old_head;
+
+	tail_segment->hw.next_desc = chan->seg_v->phys;
+	head_desc->async_tx.phys = new_head->phys;
+
+	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+	if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+		reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+		reg |= chan->desc_pendingcount <<
+				  XILINX_DMA_CR_COALESCE_SHIFT;
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+	}
+
+	if (chan->has_sg)
+		dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+			       head_desc->async_tx.phys);
+
+	xilinx_dma_start(chan);
+
+	if (chan->err)
+		return;
+
+	/* Start the transfer */
+	if (chan->has_sg) {
+		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+			       tail_segment->phys);
+	} else {
+		struct xilinx_axidma_tx_segment *segment;
+		struct xilinx_axidma_desc_hw *hw;
+
+		segment = list_first_entry(&head_desc->segments,
+					   struct xilinx_axidma_tx_segment,
+					   node);
+		hw = &segment->hw;
+
+		dma_ctrl_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr);
+
+		/* Start the transfer */
+		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+			       hw->control & XILINX_DMA_MAX_TRANS_LEN);
+	}
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	chan->desc_pendingcount = 0;
+}
+
+/**
+ * xilinx_dma_issue_pending - Issue pending transactions
  * @dchan: DMA channel
  */
-static void xilinx_vdma_issue_pending(struct dma_chan *dchan)
+static void xilinx_dma_issue_pending(struct dma_chan *dchan)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->lock, flags);
-	xilinx_vdma_start_transfer(chan);
+	chan->start_transfer(chan);
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
- * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete
+ * xilinx_dma_complete_descriptor - Mark the active descriptor as complete
  * @chan : xilinx DMA channel
  *
  * CONTEXT: hardirq
  */
-static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
 {
-	struct xilinx_vdma_tx_descriptor *desc, *next;
+	struct xilinx_dma_tx_descriptor *desc, *next;
 
 	/* This function was invoked with lock held */
 	if (list_empty(&chan->active_list))
@@ -749,27 +1215,27 @@
 }
 
 /**
- * xilinx_vdma_reset - Reset VDMA channel
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_reset - Reset DMA channel
+ * @chan: Driver specific DMA channel
  *
  * Return: '0' on success and failure value on error
  */
-static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan)
+static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
 {
 	int err;
 	u32 tmp;
 
-	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET);
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET);
 
 	/* Wait for the hardware to finish reset */
-	err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMACR, tmp,
-				      !(tmp & XILINX_VDMA_DMACR_RESET), 0,
-				      XILINX_VDMA_LOOP_COUNT);
+	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
+				      !(tmp & XILINX_DMA_DMACR_RESET), 0,
+				      XILINX_DMA_LOOP_COUNT);
 
 	if (err) {
 		dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
-			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR),
-			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+			dma_ctrl_read(chan, XILINX_DMA_REG_DMACR),
+			dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
 		return -ETIMEDOUT;
 	}
 
@@ -779,48 +1245,48 @@
 }
 
 /**
- * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts
+ * @chan: Driver specific DMA channel
  *
  * Return: '0' on success and failure value on error
  */
-static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan)
+static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan)
 {
 	int err;
 
 	/* Reset VDMA */
-	err = xilinx_vdma_reset(chan);
+	err = xilinx_dma_reset(chan);
 	if (err)
 		return err;
 
 	/* Enable interrupts */
-	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR,
-		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+		      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
 
 	return 0;
 }
 
 /**
- * xilinx_vdma_irq_handler - VDMA Interrupt handler
+ * xilinx_dma_irq_handler - DMA Interrupt handler
  * @irq: IRQ number
- * @data: Pointer to the Xilinx VDMA channel structure
+ * @data: Pointer to the Xilinx DMA channel structure
  *
  * Return: IRQ_HANDLED/IRQ_NONE
  */
-static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data)
+static irqreturn_t xilinx_dma_irq_handler(int irq, void *data)
 {
-	struct xilinx_vdma_chan *chan = data;
+	struct xilinx_dma_chan *chan = data;
 	u32 status;
 
 	/* Read the status and ack the interrupts. */
-	status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR);
-	if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK))
+	status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR);
+	if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK))
 		return IRQ_NONE;
 
-	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
-			status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+			status & XILINX_DMA_DMAXR_ALL_IRQ_MASK);
 
-	if (status & XILINX_VDMA_DMASR_ERR_IRQ) {
+	if (status & XILINX_DMA_DMASR_ERR_IRQ) {
 		/*
 		 * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
 		 * error is recoverable, ignore it. Otherwise flag the error.
@@ -828,22 +1294,23 @@
 		 * Only recoverable errors can be cleared in the DMASR register,
 		 * make sure not to write to other error bits to 1.
 		 */
-		u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK;
-		vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
-				errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK);
+		u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK;
+
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+				errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK);
 
 		if (!chan->flush_on_fsync ||
-		    (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) {
+		    (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) {
 			dev_err(chan->dev,
 				"Channel %p has errors %x, cdr %x tdr %x\n",
 				chan, errors,
-				vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC),
-				vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC));
+				dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC),
+				dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC));
 			chan->err = true;
 		}
 	}
 
-	if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) {
+	if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
 		/*
 		 * Device takes too long to do the transfer when user requires
 		 * responsiveness.
@@ -851,10 +1318,10 @@
 		dev_dbg(chan->dev, "Inter-packet latency too long\n");
 	}
 
-	if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) {
+	if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
 		spin_lock(&chan->lock);
-		xilinx_vdma_complete_descriptor(chan);
-		xilinx_vdma_start_transfer(chan);
+		xilinx_dma_complete_descriptor(chan);
+		chan->start_transfer(chan);
 		spin_unlock(&chan->lock);
 	}
 
@@ -867,11 +1334,13 @@
  * @chan: Driver specific dma channel
  * @desc: dma transaction descriptor
  */
-static void append_desc_queue(struct xilinx_vdma_chan *chan,
-			      struct xilinx_vdma_tx_descriptor *desc)
+static void append_desc_queue(struct xilinx_dma_chan *chan,
+			      struct xilinx_dma_tx_descriptor *desc)
 {
 	struct xilinx_vdma_tx_segment *tail_segment;
-	struct xilinx_vdma_tx_descriptor *tail_desc;
+	struct xilinx_dma_tx_descriptor *tail_desc;
+	struct xilinx_axidma_tx_segment *axidma_tail_segment;
+	struct xilinx_cdma_tx_segment *cdma_tail_segment;
 
 	if (list_empty(&chan->pending_list))
 		goto append;
@@ -881,10 +1350,23 @@
 	 * that already exists in memory.
 	 */
 	tail_desc = list_last_entry(&chan->pending_list,
-				    struct xilinx_vdma_tx_descriptor, node);
-	tail_segment = list_last_entry(&tail_desc->segments,
-				       struct xilinx_vdma_tx_segment, node);
-	tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+				    struct xilinx_dma_tx_descriptor, node);
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		tail_segment = list_last_entry(&tail_desc->segments,
+					       struct xilinx_vdma_tx_segment,
+					       node);
+		tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		cdma_tail_segment = list_last_entry(&tail_desc->segments,
+						struct xilinx_cdma_tx_segment,
+						node);
+		cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else {
+		axidma_tail_segment = list_last_entry(&tail_desc->segments,
+					       struct xilinx_axidma_tx_segment,
+					       node);
+		axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	}
 
 	/*
 	 * Add the software descriptor and all children to the list
@@ -894,22 +1376,23 @@
 	list_add_tail(&desc->node, &chan->pending_list);
 	chan->desc_pendingcount++;
 
-	if (unlikely(chan->desc_pendingcount > chan->num_frms)) {
+	if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA)
+	    && unlikely(chan->desc_pendingcount > chan->num_frms)) {
 		dev_dbg(chan->dev, "desc pendingcount is too high\n");
 		chan->desc_pendingcount = chan->num_frms;
 	}
 }
 
 /**
- * xilinx_vdma_tx_submit - Submit DMA transaction
+ * xilinx_dma_tx_submit - Submit DMA transaction
  * @tx: Async transaction descriptor
  *
  * Return: cookie value on success and failure value on error
  */
-static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx);
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan);
+	struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan);
 	dma_cookie_t cookie;
 	unsigned long flags;
 	int err;
@@ -919,7 +1402,7 @@
 		 * If reset fails, need to hard reset the system.
 		 * Channel is no longer functional
 		 */
-		err = xilinx_vdma_chan_reset(chan);
+		err = xilinx_dma_chan_reset(chan);
 		if (err < 0)
 			return err;
 	}
@@ -950,8 +1433,8 @@
 				 struct dma_interleaved_template *xt,
 				 unsigned long flags)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
-	struct xilinx_vdma_tx_descriptor *desc;
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
 	struct xilinx_vdma_tx_segment *segment, *prev = NULL;
 	struct xilinx_vdma_desc_hw *hw;
 
@@ -965,12 +1448,12 @@
 		return NULL;
 
 	/* Allocate a transaction descriptor. */
-	desc = xilinx_vdma_alloc_tx_descriptor(chan);
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
 	if (!desc)
 		return NULL;
 
 	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
-	desc->async_tx.tx_submit = xilinx_vdma_tx_submit;
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
 	async_tx_ack(&desc->async_tx);
 
 	/* Allocate the link descriptor from DMA pool */
@@ -983,14 +1466,25 @@
 	hw->vsize = xt->numf;
 	hw->hsize = xt->sgl[0].size;
 	hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
-			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+			XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT;
 	hw->stride |= chan->config.frm_dly <<
-			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+			XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
 
-	if (xt->dir != DMA_MEM_TO_DEV)
-		hw->buf_addr = xt->dst_start;
-	else
-		hw->buf_addr = xt->src_start;
+	if (xt->dir != DMA_MEM_TO_DEV) {
+		if (chan->ext_addr) {
+			hw->buf_addr = lower_32_bits(xt->dst_start);
+			hw->buf_addr_msb = upper_32_bits(xt->dst_start);
+		} else {
+			hw->buf_addr = xt->dst_start;
+		}
+	} else {
+		if (chan->ext_addr) {
+			hw->buf_addr = lower_32_bits(xt->src_start);
+			hw->buf_addr_msb = upper_32_bits(xt->src_start);
+		} else {
+			hw->buf_addr = xt->src_start;
+		}
+	}
 
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);
@@ -1005,29 +1499,194 @@
 	return &desc->async_tx;
 
 error:
-	xilinx_vdma_free_tx_descriptor(chan, desc);
+	xilinx_dma_free_tx_descriptor(chan, desc);
 	return NULL;
 }
 
 /**
- * xilinx_vdma_terminate_all - Halt the channel and free descriptors
- * @chan: Driver specific VDMA Channel pointer
+ * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: destination address
+ * @dma_src: source address
+ * @len: transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
  */
-static int xilinx_vdma_terminate_all(struct dma_chan *dchan)
+static struct dma_async_tx_descriptor *
+xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+			dma_addr_t dma_src, size_t len, unsigned long flags)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_cdma_tx_segment *segment, *prev;
+	struct xilinx_cdma_desc_hw *hw;
+
+	if (!len || len > XILINX_DMA_MAX_TRANS_LEN)
+		return NULL;
+
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_cdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	hw = &segment->hw;
+	hw->control = len;
+	hw->src_addr = dma_src;
+	hw->dest_addr = dma_dst;
+
+	/* Fill the previous next descriptor with current */
+	prev = list_last_entry(&desc->segments,
+			       struct xilinx_cdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	prev = segment;
+
+	/* Link the last hardware descriptor with the first. */
+	segment = list_first_entry(&desc->segments,
+				struct xilinx_cdma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+	prev->hw.next_desc = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ * @context: APP words of the descriptor
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg(
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment = NULL, *prev = NULL;
+	u32 *app_w = (u32 *)context;
+	struct scatterlist *sg;
+	size_t copy;
+	size_t sg_used;
+	unsigned int i;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Build transactions using information in the scatter gather list */
+	for_each_sg(sgl, sg, sg_len, i) {
+		sg_used = 0;
+
+		/* Loop until the entire scatterlist entry is used */
+		while (sg_used < sg_dma_len(sg)) {
+			struct xilinx_axidma_desc_hw *hw;
+
+			/* Get a free segment */
+			segment = xilinx_axidma_alloc_tx_segment(chan);
+			if (!segment)
+				goto error;
+
+			/*
+			 * Calculate the maximum number of bytes to transfer,
+			 * making sure it is less than the hw limit
+			 */
+			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+				     XILINX_DMA_MAX_TRANS_LEN);
+			hw = &segment->hw;
+
+			/* Fill in the descriptor */
+			hw->buf_addr = sg_dma_address(sg) + sg_used;
+
+			hw->control = copy;
+
+			if (chan->direction == DMA_MEM_TO_DEV) {
+				if (app_w)
+					memcpy(hw->app, app_w, sizeof(u32) *
+					       XILINX_DMA_NUM_APP_WORDS);
+			}
+
+			if (prev)
+				prev->hw.next_desc = segment->phys;
+
+			prev = segment;
+			sg_used += copy;
+
+			/*
+			 * Insert the segment into the descriptor segments
+			 * list.
+			 */
+			list_add_tail(&segment->node, &desc->segments);
+		}
+	}
+
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_axidma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+	prev->hw.next_desc = segment->phys;
+
+	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_DMA_BD_SOP;
+		segment = list_last_entry(&desc->segments,
+					  struct xilinx_axidma_tx_segment,
+					  node);
+		segment->hw.control |= XILINX_DMA_BD_EOP;
+	}
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_terminate_all - Halt the channel and free descriptors
+ * @chan: Driver specific DMA Channel pointer
+ */
+static int xilinx_dma_terminate_all(struct dma_chan *dchan)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 
 	/* Halt the DMA engine */
-	xilinx_vdma_halt(chan);
+	xilinx_dma_halt(chan);
 
 	/* Remove and free all of the descriptors in the lists */
-	xilinx_vdma_free_descriptors(chan);
+	xilinx_dma_free_descriptors(chan);
 
 	return 0;
 }
 
 /**
- * xilinx_vdma_channel_set_config - Configure VDMA channel
+ * xilinx_dma_channel_set_config - Configure VDMA channel
  * Run-time configuration for Axi VDMA, supports:
  * . halt the channel
  * . configure interrupt coalescing and inter-packet delay threshold
@@ -1042,13 +1701,13 @@
 int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
 					struct xilinx_vdma_config *cfg)
 {
-	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	u32 dmacr;
 
 	if (cfg->reset)
-		return xilinx_vdma_chan_reset(chan);
+		return xilinx_dma_chan_reset(chan);
 
-	dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+	dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
 	chan->config.frm_dly = cfg->frm_dly;
 	chan->config.park = cfg->park;
@@ -1058,8 +1717,8 @@
 	chan->config.master = cfg->master;
 
 	if (cfg->gen_lock && chan->genlock) {
-		dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN;
-		dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT;
+		dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
+		dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
 	}
 
 	chan->config.frm_cnt_en = cfg->frm_cnt_en;
@@ -1071,21 +1730,21 @@
 	chan->config.coalesc = cfg->coalesc;
 	chan->config.delay = cfg->delay;
 
-	if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) {
-		dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT;
+	if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
+		dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
 		chan->config.coalesc = cfg->coalesc;
 	}
 
-	if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) {
-		dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT;
+	if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
+		dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
 		chan->config.delay = cfg->delay;
 	}
 
 	/* FSync Source selection */
-	dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK;
-	dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT;
+	dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK;
+	dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT;
 
-	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr);
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr);
 
 	return 0;
 }
@@ -1096,14 +1755,14 @@
  */
 
 /**
- * xilinx_vdma_chan_remove - Per Channel remove function
- * @chan: Driver specific VDMA channel
+ * xilinx_dma_chan_remove - Per Channel remove function
+ * @chan: Driver specific DMA channel
  */
-static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan)
+static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan)
 {
 	/* Disable all interrupts */
-	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR,
-		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+		      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
 
 	if (chan->irq > 0)
 		free_irq(chan->irq, chan);
@@ -1113,8 +1772,197 @@
 	list_del(&chan->common.device_node);
 }
 
+static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **tx_clk, struct clk **rx_clk,
+			    struct clk **sg_clk, struct clk **tmp_clk)
+{
+	int err;
+
+	*tmp_clk = NULL;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err);
+		return err;
+	}
+
+	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+	if (IS_ERR(*tx_clk))
+		*tx_clk = NULL;
+
+	*rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+	if (IS_ERR(*rx_clk))
+		*rx_clk = NULL;
+
+	*sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk");
+	if (IS_ERR(*sg_clk))
+		*sg_clk = NULL;
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	err = clk_prepare_enable(*rx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		goto err_disable_txclk;
+	}
+
+	err = clk_prepare_enable(*sg_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable sg_clk (%u)\n", err);
+		goto err_disable_rxclk;
+	}
+
+	return 0;
+
+err_disable_rxclk:
+	clk_disable_unprepare(*rx_clk);
+err_disable_txclk:
+	clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **dev_clk, struct clk **tmp_clk,
+			    struct clk **tmp1_clk, struct clk **tmp2_clk)
+{
+	int err;
+
+	*tmp_clk = NULL;
+	*tmp1_clk = NULL;
+	*tmp2_clk = NULL;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_clk (%u)\n", err);
+		return err;
+	}
+
+	*dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
+	if (IS_ERR(*dev_clk)) {
+		err = PTR_ERR(*dev_clk);
+		dev_err(&pdev->dev, "failed to get dev_clk (%u)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*dev_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable dev_clk (%u)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	return 0;
+
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **tx_clk, struct clk **txs_clk,
+			    struct clk **rx_clk, struct clk **rxs_clk)
+{
+	int err;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err);
+		return err;
+	}
+
+	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+	if (IS_ERR(*tx_clk))
+		*tx_clk = NULL;
+
+	*txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk");
+	if (IS_ERR(*txs_clk))
+		*txs_clk = NULL;
+
+	*rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+	if (IS_ERR(*rx_clk))
+		*rx_clk = NULL;
+
+	*rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk");
+	if (IS_ERR(*rxs_clk))
+		*rxs_clk = NULL;
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	err = clk_prepare_enable(*txs_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable txs_clk (%u)\n", err);
+		goto err_disable_txclk;
+	}
+
+	err = clk_prepare_enable(*rx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		goto err_disable_txsclk;
+	}
+
+	err = clk_prepare_enable(*rxs_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rxs_clk (%u)\n", err);
+		goto err_disable_rxclk;
+	}
+
+	return 0;
+
+err_disable_rxclk:
+	clk_disable_unprepare(*rx_clk);
+err_disable_txsclk:
+	clk_disable_unprepare(*txs_clk);
+err_disable_txclk:
+	clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static void xdma_disable_allclks(struct xilinx_dma_device *xdev)
+{
+	clk_disable_unprepare(xdev->rxs_clk);
+	clk_disable_unprepare(xdev->rx_clk);
+	clk_disable_unprepare(xdev->txs_clk);
+	clk_disable_unprepare(xdev->tx_clk);
+	clk_disable_unprepare(xdev->axi_clk);
+}
+
 /**
- * xilinx_vdma_chan_probe - Per Channel Probing
+ * xilinx_dma_chan_probe - Per Channel Probing
  * It get channel features from the device tree entry and
  * initialize special channel handling routines
  *
@@ -1123,10 +1971,10 @@
  *
  * Return: '0' on success and failure value on error
  */
-static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev,
+static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 				  struct device_node *node)
 {
-	struct xilinx_vdma_chan *chan;
+	struct xilinx_dma_chan *chan;
 	bool has_dre = false;
 	u32 value, width;
 	int err;
@@ -1140,6 +1988,7 @@
 	chan->xdev = xdev;
 	chan->has_sg = xdev->has_sg;
 	chan->desc_pendingcount = 0x0;
+	chan->ext_addr = xdev->ext_addr;
 
 	spin_lock_init(&chan->lock);
 	INIT_LIST_HEAD(&chan->pending_list);
@@ -1169,23 +2018,27 @@
 		chan->direction = DMA_MEM_TO_DEV;
 		chan->id = 0;
 
-		chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET;
-		chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+		chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+			chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
 
-		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
-		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S)
-			chan->flush_on_fsync = true;
+			if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+			    xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S)
+				chan->flush_on_fsync = true;
+		}
 	} else if (of_device_is_compatible(node,
 					    "xlnx,axi-vdma-s2mm-channel")) {
 		chan->direction = DMA_DEV_TO_MEM;
 		chan->id = 1;
 
-		chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET;
-		chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+			chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
 
-		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
-		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM)
-			chan->flush_on_fsync = true;
+			if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+			    xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM)
+				chan->flush_on_fsync = true;
+		}
 	} else {
 		dev_err(xdev->dev, "Invalid channel compatible node\n");
 		return -EINVAL;
@@ -1193,15 +2046,22 @@
 
 	/* Request the interrupt */
 	chan->irq = irq_of_parse_and_map(node, 0);
-	err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED,
-			  "xilinx-vdma-controller", chan);
+	err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED,
+			  "xilinx-dma-controller", chan);
 	if (err) {
 		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
 		return err;
 	}
 
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		chan->start_transfer = xilinx_dma_start_transfer;
+	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
+		chan->start_transfer = xilinx_cdma_start_transfer;
+	else
+		chan->start_transfer = xilinx_vdma_start_transfer;
+
 	/* Initialize the tasklet */
-	tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet,
+	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
 			(unsigned long)chan);
 
 	/*
@@ -1214,7 +2074,7 @@
 	xdev->chan[chan->id] = chan;
 
 	/* Reset the channel */
-	err = xilinx_vdma_chan_reset(chan);
+	err = xilinx_dma_chan_reset(chan);
 	if (err < 0) {
 		dev_err(xdev->dev, "Reset channel failed\n");
 		return err;
@@ -1233,28 +2093,54 @@
 static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
 						struct of_dma *ofdma)
 {
-	struct xilinx_vdma_device *xdev = ofdma->of_dma_data;
+	struct xilinx_dma_device *xdev = ofdma->of_dma_data;
 	int chan_id = dma_spec->args[0];
 
-	if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id])
+	if (chan_id >= XILINX_DMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id])
 		return NULL;
 
 	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
 }
 
+static const struct xilinx_dma_config axidma_config = {
+	.dmatype = XDMA_TYPE_AXIDMA,
+	.clk_init = axidma_clk_init,
+};
+
+static const struct xilinx_dma_config axicdma_config = {
+	.dmatype = XDMA_TYPE_CDMA,
+	.clk_init = axicdma_clk_init,
+};
+
+static const struct xilinx_dma_config axivdma_config = {
+	.dmatype = XDMA_TYPE_VDMA,
+	.clk_init = axivdma_clk_init,
+};
+
+static const struct of_device_id xilinx_dma_of_ids[] = {
+	{ .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config },
+	{ .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config },
+	{ .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config },
+	{}
+};
+MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids);
+
 /**
- * xilinx_vdma_probe - Driver probe function
+ * xilinx_dma_probe - Driver probe function
  * @pdev: Pointer to the platform_device structure
  *
  * Return: '0' on success and failure value on error
  */
-static int xilinx_vdma_probe(struct platform_device *pdev)
+static int xilinx_dma_probe(struct platform_device *pdev)
 {
+	int (*clk_init)(struct platform_device *, struct clk **, struct clk **,
+			struct clk **, struct clk **, struct clk **)
+					= axivdma_clk_init;
 	struct device_node *node = pdev->dev.of_node;
-	struct xilinx_vdma_device *xdev;
-	struct device_node *child;
+	struct xilinx_dma_device *xdev;
+	struct device_node *child, *np = pdev->dev.of_node;
 	struct resource *io;
-	u32 num_frames;
+	u32 num_frames, addr_width;
 	int i, err;
 
 	/* Allocate and initialize the DMA engine structure */
@@ -1263,6 +2149,20 @@
 		return -ENOMEM;
 
 	xdev->dev = &pdev->dev;
+	if (np) {
+		const struct of_device_id *match;
+
+		match = of_match_node(xilinx_dma_of_ids, np);
+		if (match && match->data) {
+			xdev->dma_config = match->data;
+			clk_init = xdev->dma_config->clk_init;
+		}
+	}
+
+	err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk,
+		       &xdev->rx_clk, &xdev->rxs_clk);
+	if (err)
+		return err;
 
 	/* Request and map I/O memory */
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1273,46 +2173,77 @@
 	/* Retrieve the DMA engine properties from the device tree */
 	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
 
-	err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames);
-	if (err < 0) {
-		dev_err(xdev->dev, "missing xlnx,num-fstores property\n");
-		return err;
+	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		err = of_property_read_u32(node, "xlnx,num-fstores",
+					   &num_frames);
+		if (err < 0) {
+			dev_err(xdev->dev,
+				"missing xlnx,num-fstores property\n");
+			return err;
+		}
+
+		err = of_property_read_u32(node, "xlnx,flush-fsync",
+					   &xdev->flush_on_fsync);
+		if (err < 0)
+			dev_warn(xdev->dev,
+				 "missing xlnx,flush-fsync property\n");
 	}
 
-	err = of_property_read_u32(node, "xlnx,flush-fsync",
-					&xdev->flush_on_fsync);
+	err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width);
 	if (err < 0)
-		dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n");
+		dev_warn(xdev->dev, "missing xlnx,addrwidth property\n");
+
+	if (addr_width > 32)
+		xdev->ext_addr = true;
+	else
+		xdev->ext_addr = false;
+
+	/* Set the dma mask bits */
+	dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width));
 
 	/* Initialize the DMA engine */
 	xdev->common.dev = &pdev->dev;
 
 	INIT_LIST_HEAD(&xdev->common.channels);
-	dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
-	dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+	if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) {
+		dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+		dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+	}
 
 	xdev->common.device_alloc_chan_resources =
-				xilinx_vdma_alloc_chan_resources;
+				xilinx_dma_alloc_chan_resources;
 	xdev->common.device_free_chan_resources =
-				xilinx_vdma_free_chan_resources;
-	xdev->common.device_prep_interleaved_dma =
+				xilinx_dma_free_chan_resources;
+	xdev->common.device_terminate_all = xilinx_dma_terminate_all;
+	xdev->common.device_tx_status = xilinx_dma_tx_status;
+	xdev->common.device_issue_pending = xilinx_dma_issue_pending;
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
+		/* Residue calculation is supported by only AXI DMA */
+		xdev->common.residue_granularity =
+					  DMA_RESIDUE_GRANULARITY_SEGMENT;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
+		xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+	} else {
+		xdev->common.device_prep_interleaved_dma =
 				xilinx_vdma_dma_prep_interleaved;
-	xdev->common.device_terminate_all = xilinx_vdma_terminate_all;
-	xdev->common.device_tx_status = xilinx_vdma_tx_status;
-	xdev->common.device_issue_pending = xilinx_vdma_issue_pending;
+	}
 
 	platform_set_drvdata(pdev, xdev);
 
 	/* Initialize the channels */
 	for_each_child_of_node(node, child) {
-		err = xilinx_vdma_chan_probe(xdev, child);
+		err = xilinx_dma_chan_probe(xdev, child);
 		if (err < 0)
-			goto error;
+			goto disable_clks;
 	}
 
-	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
-		if (xdev->chan[i])
-			xdev->chan[i]->num_frms = num_frames;
+	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
+			if (xdev->chan[i])
+				xdev->chan[i]->num_frms = num_frames;
+	}
 
 	/* Register the DMA engine with the core */
 	dma_async_device_register(&xdev->common);
@@ -1329,49 +2260,47 @@
 
 	return 0;
 
+disable_clks:
+	xdma_disable_allclks(xdev);
 error:
-	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+	for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
 		if (xdev->chan[i])
-			xilinx_vdma_chan_remove(xdev->chan[i]);
+			xilinx_dma_chan_remove(xdev->chan[i]);
 
 	return err;
 }
 
 /**
- * xilinx_vdma_remove - Driver remove function
+ * xilinx_dma_remove - Driver remove function
  * @pdev: Pointer to the platform_device structure
  *
  * Return: Always '0'
  */
-static int xilinx_vdma_remove(struct platform_device *pdev)
+static int xilinx_dma_remove(struct platform_device *pdev)
 {
-	struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev);
+	struct xilinx_dma_device *xdev = platform_get_drvdata(pdev);
 	int i;
 
 	of_dma_controller_free(pdev->dev.of_node);
 
 	dma_async_device_unregister(&xdev->common);
 
-	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+	for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++)
 		if (xdev->chan[i])
-			xilinx_vdma_chan_remove(xdev->chan[i]);
+			xilinx_dma_chan_remove(xdev->chan[i]);
+
+	xdma_disable_allclks(xdev);
 
 	return 0;
 }
 
-static const struct of_device_id xilinx_vdma_of_ids[] = {
-	{ .compatible = "xlnx,axi-vdma-1.00.a",},
-	{}
-};
-MODULE_DEVICE_TABLE(of, xilinx_vdma_of_ids);
-
 static struct platform_driver xilinx_vdma_driver = {
 	.driver = {
 		.name = "xilinx-vdma",
-		.of_match_table = xilinx_vdma_of_ids,
+		.of_match_table = xilinx_dma_of_ids,
 	},
-	.probe = xilinx_vdma_probe,
-	.remove = xilinx_vdma_remove,
+	.probe = xilinx_dma_probe,
+	.remove = xilinx_dma_remove,
 };
 
 module_platform_driver(xilinx_vdma_driver);
diff --git a/drivers/firmware/broadcom/Kconfig b/drivers/firmware/broadcom/Kconfig
index 6bed1199..3c7e5b7 100644
--- a/drivers/firmware/broadcom/Kconfig
+++ b/drivers/firmware/broadcom/Kconfig
@@ -9,3 +9,14 @@
 	  This driver provides an easy way to get value of requested parameter.
 	  It simply reads content of NVRAM and parses it. It doesn't control any
 	  hardware part itself.
+
+config BCM47XX_SPROM
+	bool "Broadcom SPROM driver"
+	depends on BCM47XX_NVRAM
+	help
+	  Broadcom devices store configuration data in SPROM. Accessing it is
+	  specific to the bus host type, e.g. PCI(e) devices have it mapped in
+	  a PCI BAR.
+	  In case of SoC devices SPROM content is stored on a flash used by
+	  bootloader firmware CFE. This driver provides method to ssb and bcma
+	  drivers to read SPROM on SoC.
diff --git a/drivers/firmware/broadcom/Makefile b/drivers/firmware/broadcom/Makefile
index d0e6835..f93efc4 100644
--- a/drivers/firmware/broadcom/Makefile
+++ b/drivers/firmware/broadcom/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_BCM47XX_NVRAM)		+= bcm47xx_nvram.o
+obj-$(CONFIG_BCM47XX_SPROM)		+= bcm47xx_sprom.o
diff --git a/arch/mips/bcm47xx/sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c
similarity index 97%
rename from arch/mips/bcm47xx/sprom.c
rename to drivers/firmware/broadcom/bcm47xx_sprom.c
index ca7ad13..b6eb875 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/drivers/firmware/broadcom/bcm47xx_sprom.c
@@ -26,9 +26,11 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <bcm47xx.h>
-#include <linux/if_ether.h>
+#include <linux/bcm47xx_nvram.h>
+#include <linux/bcma/bcma.h>
 #include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ssb/ssb.h>
 
 static void create_key(const char *prefix, const char *postfix,
 		       const char *name, char *buf, int len)
@@ -599,7 +601,7 @@
 	bcm47xx_sprom_fill_auto(sprom, prefix, fallback);
 }
 
-#if defined(CONFIG_BCM47XX_SSB)
+#if IS_BUILTIN(CONFIG_SSB) && IS_ENABLED(CONFIG_SSB_SPROM)
 static int bcm47xx_get_sprom_ssb(struct ssb_bus *bus, struct ssb_sprom *out)
 {
 	char prefix[10];
@@ -622,7 +624,7 @@
 }
 #endif
 
-#if defined(CONFIG_BCM47XX_BCMA)
+#if IS_BUILTIN(CONFIG_BCMA)
 /*
  * Having many NVRAM entries for PCI devices led to repeating prefixes like
  * pci/1/1/ all the time and wasting flash space. So at some point Broadcom
@@ -706,19 +708,30 @@
 }
 #endif
 
+static unsigned int bcm47xx_sprom_registered;
+
 /*
  * On bcm47xx we need to register SPROM fallback handler very early, so we can't
  * use anything like platform device / driver for this.
  */
-void bcm47xx_sprom_register_fallbacks(void)
+int bcm47xx_sprom_register_fallbacks(void)
 {
-#if defined(CONFIG_BCM47XX_SSB)
+	if (bcm47xx_sprom_registered)
+		return 0;
+
+#if IS_BUILTIN(CONFIG_SSB) && IS_ENABLED(CONFIG_SSB_SPROM)
 	if (ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom_ssb))
 		pr_warn("Failed to register ssb SPROM handler\n");
 #endif
 
-#if defined(CONFIG_BCM47XX_BCMA)
+#if IS_BUILTIN(CONFIG_BCMA)
 	if (bcma_arch_register_fallback_sprom(&bcm47xx_get_sprom_bcma))
 		pr_warn("Failed to register bcma SPROM handler\n");
 #endif
+
+	bcm47xx_sprom_registered = 1;
+
+	return 0;
 }
+
+fs_initcall(bcm47xx_sprom_register_fallbacks);
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index 9d233bb..a8e89df 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -617,7 +617,7 @@
 };
 EXPORT_SYMBOL(i2c_bit_algo);
 
-const struct i2c_adapter_quirks i2c_bit_quirk_no_clk_stretch = {
+static const struct i2c_adapter_quirks i2c_bit_quirk_no_clk_stretch = {
 	.flags = I2C_AQ_NO_CLK_STRETCH,
 };
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 0967e1a..2dd40dd 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -663,7 +663,7 @@
 
 config I2C_MV64XXX
 	tristate "Marvell mv64xxx I2C Controller"
-	depends on MV64X60 || PLAT_ORION || ARCH_SUNXI
+	depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU
 	help
 	  If you say yes to this option, support will be included for the
 	  built-in I2C interface on the Marvell 64xxx line of host bridges.
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index b9f0fff..19c8438 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -267,7 +267,7 @@
 	iproc_i2c->msg = msg;
 
 	/* format and load slave address into the TX FIFO */
-	addr = msg->addr << 1 | (msg->flags & I2C_M_RD ? 1 : 0);
+	addr = i2c_8bit_addr_from_msg(msg);
 	writel(addr, iproc_i2c->base + M_TX_OFFSET);
 
 	/*
diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c
index 2c9d9b1..ac9f476 100644
--- a/drivers/i2c/busses/i2c-bcm-kona.c
+++ b/drivers/i2c/busses/i2c-bcm-kona.c
@@ -501,10 +501,7 @@
 				return -EREMOTEIO;
 		}
 	} else {
-		addr = msg->addr << 1;
-
-		if (msg->flags & I2C_M_RD)
-			addr |= 1;
+		addr = i2c_8bit_addr_from_msg(msg);
 
 		if (bcm_kona_i2c_write_byte(dev, addr, 0) < 0)
 			return -EREMOTEIO;
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 4a45408..6a8cfc1 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -446,9 +446,7 @@
 
 		}
 	} else {
-		addr = msg->addr << 1;
-		if (msg->flags & I2C_M_RD)
-			addr |= 1;
+		addr = i2c_8bit_addr_from_msg(msg);
 
 		bsc_writel(dev, addr, chip_address);
 	}
diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index b167ab2..ee57c1e 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -197,9 +197,7 @@
 	tbdf = cpm->tbase + tx;
 	rbdf = cpm->rbase + rx;
 
-	addr = pmsg->addr << 1;
-	if (pmsg->flags & I2C_M_RD)
-		addr |= 1;
+	addr = i2c_8bit_addr_from_msg(pmsg);
 
 	tb = cpm->txbuf[tx];
 	rb = cpm->rxbuf[rx];
diff --git a/drivers/i2c/busses/i2c-dln2.c b/drivers/i2c/busses/i2c-dln2.c
index 1600edd..f2eb4f7 100644
--- a/drivers/i2c/busses/i2c-dln2.c
+++ b/drivers/i2c/busses/i2c-dln2.c
@@ -19,6 +19,7 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/dln2.h>
+#include <linux/acpi.h>
 
 #define DLN2_I2C_MODULE_ID		0x03
 #define DLN2_I2C_CMD(cmd)		DLN2_CMD(cmd, DLN2_I2C_MODULE_ID)
@@ -210,6 +211,7 @@
 	dln2->adapter.algo = &dln2_i2c_usb_algorithm;
 	dln2->adapter.quirks = &dln2_i2c_quirks;
 	dln2->adapter.dev.parent = dev;
+	ACPI_COMPANION_SET(&dln2->adapter.dev, ACPI_COMPANION(&pdev->dev));
 	dln2->adapter.dev.of_node = dev->of_node;
 	i2c_set_adapdata(&dln2->adapter, dln2);
 	snprintf(dln2->adapter.name, sizeof(dln2->adapter.name), "%s-%s-%d",
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index f54ece8..c0e3ada 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -861,14 +861,8 @@
 #endif
 
 static const struct dev_pm_ops exynos5_i2c_dev_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.suspend_noirq = exynos5_i2c_suspend_noirq,
-	.resume_noirq = exynos5_i2c_resume_noirq,
-	.freeze_noirq = exynos5_i2c_suspend_noirq,
-	.thaw_noirq = exynos5_i2c_resume_noirq,
-	.poweroff_noirq = exynos5_i2c_suspend_noirq,
-	.restore_noirq = exynos5_i2c_resume_noirq,
-#endif
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(exynos5_i2c_suspend_noirq,
+				      exynos5_i2c_resume_noirq)
 };
 
 static struct platform_driver exynos5_i2c_driver = {
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 585a3b7..64b1208b 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -94,6 +94,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/itco_wdt.h>
+#include <linux/pm_runtime.h>
 
 #if (defined CONFIG_I2C_MUX_GPIO || defined CONFIG_I2C_MUX_GPIO_MODULE) && \
 		defined CONFIG_DMI
@@ -714,9 +715,11 @@
 {
 	int hwpec;
 	int block = 0;
-	int ret, xact = 0;
+	int ret = 0, xact = 0;
 	struct i801_priv *priv = i2c_get_adapdata(adap);
 
+	pm_runtime_get_sync(&priv->pci_dev->dev);
+
 	hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
 		&& size != I2C_SMBUS_QUICK
 		&& size != I2C_SMBUS_I2C_BLOCK_DATA;
@@ -773,7 +776,8 @@
 	default:
 		dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
 			size);
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
 	if (hwpec)	/* enable/disable hardware PEC */
@@ -796,11 +800,11 @@
 		       ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
 
 	if (block)
-		return ret;
+		goto out;
 	if (ret)
-		return ret;
+		goto out;
 	if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
-		return 0;
+		goto out;
 
 	switch (xact & 0x7f) {
 	case I801_BYTE:	/* Result put in SMBHSTDAT0 */
@@ -812,7 +816,11 @@
 			     (inb_p(SMBHSTDAT1(priv)) << 8);
 		break;
 	}
-	return 0;
+
+out:
+	pm_runtime_mark_last_busy(&priv->pci_dev->dev);
+	pm_runtime_put_autosuspend(&priv->pci_dev->dev);
+	return ret;
 }
 
 
@@ -1413,6 +1421,11 @@
 
 	pci_set_drvdata(dev, priv);
 
+	pm_runtime_set_autosuspend_delay(&dev->dev, 1000);
+	pm_runtime_use_autosuspend(&dev->dev);
+	pm_runtime_put_autosuspend(&dev->dev);
+	pm_runtime_allow(&dev->dev);
+
 	return 0;
 }
 
@@ -1420,6 +1433,9 @@
 {
 	struct i801_priv *priv = pci_get_drvdata(dev);
 
+	pm_runtime_forbid(&dev->dev);
+	pm_runtime_get_noresume(&dev->dev);
+
 	i801_del_mux(priv);
 	i2c_del_adapter(&priv->adapter);
 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
@@ -1433,34 +1449,32 @@
 }
 
 #ifdef CONFIG_PM
-static int i801_suspend(struct pci_dev *dev, pm_message_t mesg)
+static int i801_suspend(struct device *dev)
 {
-	struct i801_priv *priv = pci_get_drvdata(dev);
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct i801_priv *priv = pci_get_drvdata(pci_dev);
 
-	pci_save_state(dev);
-	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
-	pci_set_power_state(dev, pci_choose_state(dev, mesg));
+	pci_write_config_byte(pci_dev, SMBHSTCFG, priv->original_hstcfg);
 	return 0;
 }
 
-static int i801_resume(struct pci_dev *dev)
+static int i801_resume(struct device *dev)
 {
-	pci_set_power_state(dev, PCI_D0);
-	pci_restore_state(dev);
 	return 0;
 }
-#else
-#define i801_suspend NULL
-#define i801_resume NULL
 #endif
 
+static UNIVERSAL_DEV_PM_OPS(i801_pm_ops, i801_suspend,
+			    i801_resume, NULL);
+
 static struct pci_driver i801_driver = {
 	.name		= "i801_smbus",
 	.id_table	= i801_ids,
 	.probe		= i801_probe,
 	.remove		= i801_remove,
-	.suspend	= i801_suspend,
-	.resume		= i801_resume,
+	.driver		= {
+		.pm	= &i801_pm_ops,
+	},
 };
 
 static int __init i2c_i801_init(void)
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index b6c0803..cdaa7be 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -269,7 +269,7 @@
 	ndelay(t->hd_sta);
 
 	/* Send address */
-	v = (u8)((p->addr << 1) | ((p->flags & I2C_M_RD) ? 1 : 0));
+	v = i2c_8bit_addr_from_msg(p);
 	for (i = 0, mask = 0x80; i < 8; ++i, mask >>= 1){
 		out_8(&iic->directcntl, sda);
 		ndelay(t->low / 2);
diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c
index 379ef9c..ea20425b 100644
--- a/drivers/i2c/busses/i2c-img-scb.c
+++ b/drivers/i2c/busses/i2c-img-scb.c
@@ -751,9 +751,7 @@
 	switch (i2c->at_cur_cmd) {
 	case CMD_GEN_START:
 		next_cmd = CMD_GEN_DATA;
-		next_data = (i2c->msg.addr << 1);
-		if (i2c->msg.flags & I2C_M_RD)
-			next_data |= 0x1;
+		next_data = i2c_8bit_addr_from_msg(&i2c->msg);
 		break;
 	case CMD_GEN_DATA:
 		if (i2c->line_status & LINESTAT_INPUT_HELD_V)
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 1ca7ef2..1844bc9 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -525,7 +525,7 @@
 	imx_i2c_write_reg(i2c_imx->hwdata->i2cr_ien_opcode, i2c_imx, IMX_I2C_I2CR);
 
 	/* Wait controller to be stable */
-	udelay(50);
+	usleep_range(50, 150);
 
 	/* Start I2C transaction */
 	temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index 72d6161..85cbe4b 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -50,10 +50,7 @@
 {
 	unsigned char addr;
 
-	addr = (msg->addr << 1);
-
-	if (msg->flags & I2C_M_RD)
-		addr |= 1;
+	addr = i2c_8bit_addr_from_msg(msg);
 
 	return addr;
 }
diff --git a/drivers/i2c/busses/i2c-lpc2k.c b/drivers/i2c/busses/i2c-lpc2k.c
index 8560a13..586a152 100644
--- a/drivers/i2c/busses/i2c-lpc2k.c
+++ b/drivers/i2c/busses/i2c-lpc2k.c
@@ -133,9 +133,7 @@
 	case M_START:
 	case M_REPSTART:
 		/* Start bit was just sent out, send out addr and dir */
-		data = i2c->msg->addr << 1;
-		if (i2c->msg->flags & I2C_M_RD)
-			data |= 1;
+		data = i2c_8bit_addr_from_msg(i2c->msg);
 
 		writel(data, i2c->base + LPC24XX_I2DAT);
 		writel(LPC24XX_STA, i2c->base + LPC24XX_I2CONCLR);
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 453358b..d9373e6 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -413,10 +413,7 @@
 	else
 		writew(I2C_FS_START_CON, i2c->base + OFFSET_EXT_CONF);
 
-	addr_reg = msgs->addr << 1;
-	if (i2c->op == I2C_MASTER_RD)
-		addr_reg |= 0x1;
-
+	addr_reg = i2c_8bit_addr_from_msg(msgs);
 	writew(addr_reg, i2c->base + OFFSET_SLAVE_ADDR);
 
 	/* Clear interrupt status */
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 43207f5..b4dec08 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -134,9 +134,7 @@
 	int			rc;
 	u32			freq_m;
 	u32			freq_n;
-#if defined(CONFIG_HAVE_CLK)
 	struct clk              *clk;
-#endif
 	wait_queue_head_t	waitq;
 	spinlock_t		lock;
 	struct i2c_msg		*msg;
@@ -757,7 +755,6 @@
 MODULE_DEVICE_TABLE(of, mv64xxx_i2c_of_match_table);
 
 #ifdef CONFIG_OF
-#ifdef CONFIG_HAVE_CLK
 static int
 mv64xxx_calc_freq(struct mv64xxx_i2c_data *drv_data,
 		  const int tclk, const int n, const int m)
@@ -791,25 +788,20 @@
 		return false;
 	return true;
 }
-#endif /* CONFIG_HAVE_CLK */
 
 static int
 mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 		  struct device *dev)
 {
-	/* CLK is mandatory when using DT to describe the i2c bus. We
-	 * need to know tclk in order to calculate bus clock
-	 * factors.
-	 */
-#if !defined(CONFIG_HAVE_CLK)
-	/* Have OF but no CLK */
-	return -ENODEV;
-#else
 	const struct of_device_id *device;
 	struct device_node *np = dev->of_node;
 	u32 bus_freq, tclk;
 	int rc = 0;
 
+	/* CLK is mandatory when using DT to describe the i2c bus. We
+	 * need to know tclk in order to calculate bus clock
+	 * factors.
+	 */
 	if (IS_ERR(drv_data->clk)) {
 		rc = -ENODEV;
 		goto out;
@@ -869,7 +861,6 @@
 
 out:
 	return rc;
-#endif
 }
 #else /* CONFIG_OF */
 static int
@@ -907,14 +898,13 @@
 	init_waitqueue_head(&drv_data->waitq);
 	spin_lock_init(&drv_data->lock);
 
-#if defined(CONFIG_HAVE_CLK)
 	/* Not all platforms have a clk */
 	drv_data->clk = devm_clk_get(&pd->dev, NULL);
-	if (!IS_ERR(drv_data->clk)) {
-		clk_prepare(drv_data->clk);
-		clk_enable(drv_data->clk);
-	}
-#endif
+	if (IS_ERR(drv_data->clk) && PTR_ERR(drv_data->clk) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+	if (!IS_ERR(drv_data->clk))
+		clk_prepare_enable(drv_data->clk);
+
 	if (pdata) {
 		drv_data->freq_m = pdata->freq_m;
 		drv_data->freq_n = pdata->freq_n;
@@ -964,13 +954,10 @@
 	if (!IS_ERR_OR_NULL(drv_data->rstc))
 		reset_control_assert(drv_data->rstc);
 exit_clk:
-#if defined(CONFIG_HAVE_CLK)
 	/* Not all platforms have a clk */
-	if (!IS_ERR(drv_data->clk)) {
-		clk_disable(drv_data->clk);
-		clk_unprepare(drv_data->clk);
-	}
-#endif
+	if (!IS_ERR(drv_data->clk))
+		clk_disable_unprepare(drv_data->clk);
+
 	return rc;
 }
 
@@ -983,13 +970,9 @@
 	free_irq(drv_data->irq, drv_data);
 	if (!IS_ERR_OR_NULL(drv_data->rstc))
 		reset_control_assert(drv_data->rstc);
-#if defined(CONFIG_HAVE_CLK)
 	/* Not all platforms have a clk */
-	if (!IS_ERR(drv_data->clk)) {
-		clk_disable(drv_data->clk);
-		clk_unprepare(drv_data->clk);
-	}
-#endif
+	if (!IS_ERR(drv_data->clk))
+		clk_disable_unprepare(drv_data->clk);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c
index 70b3c91..42fcc94 100644
--- a/drivers/i2c/busses/i2c-nforce2.c
+++ b/drivers/i2c/busses/i2c-nforce2.c
@@ -127,7 +127,7 @@
 
 /* For multiplexing support, we need a global reference to the 1st
    SMBus channel */
-#if defined CONFIG_I2C_NFORCE2_S4985 || defined CONFIG_I2C_NFORCE2_S4985_MODULE
+#if IS_ENABLED(CONFIG_I2C_NFORCE2_S4985)
 struct i2c_adapter *nforce2_smbus;
 EXPORT_SYMBOL_GPL(nforce2_smbus);
 
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 11b7b87..dfa7a4b 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -178,10 +178,7 @@
 		if (i2c->nmsgs) {	/* end? */
 			/* send start? */
 			if (!(msg->flags & I2C_M_NOSTART)) {
-				u8 addr = (msg->addr << 1);
-
-				if (msg->flags & I2C_M_RD)
-					addr |= 1;
+				u8 addr = i2c_8bit_addr_from_msg(msg);
 
 				i2c->state = STATE_START;
 
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index 46fb6c4..aa5f01e 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -11,6 +11,7 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/atomic.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -29,13 +30,23 @@
 /* Register offsets */
 #define SW_TWSI			0x00
 #define TWSI_INT		0x10
+#define SW_TWSI_EXT		0x18
 
 /* Controller command patterns */
 #define SW_TWSI_V		BIT_ULL(63)	/* Valid bit */
+#define SW_TWSI_EIA		BIT_ULL(61)	/* Extended internal address */
 #define SW_TWSI_R		BIT_ULL(56)	/* Result or read bit */
+#define SW_TWSI_SOVR		BIT_ULL(55)	/* Size override */
+#define SW_TWSI_SIZE_SHIFT	52
+#define SW_TWSI_ADDR_SHIFT	40
+#define SW_TWSI_IA_SHIFT	32		/* Internal address */
 
 /* Controller opcode word (bits 60:57) */
 #define SW_TWSI_OP_SHIFT	57
+#define SW_TWSI_OP_7		(0ULL << SW_TWSI_OP_SHIFT)
+#define SW_TWSI_OP_7_IA		(1ULL << SW_TWSI_OP_SHIFT)
+#define SW_TWSI_OP_10		(2ULL << SW_TWSI_OP_SHIFT)
+#define SW_TWSI_OP_10_IA	(3ULL << SW_TWSI_OP_SHIFT)
 #define SW_TWSI_OP_TWSI_CLK	(4ULL << SW_TWSI_OP_SHIFT)
 #define SW_TWSI_OP_EOP		(6ULL << SW_TWSI_OP_SHIFT) /* Extended opcode */
 
@@ -48,46 +59,93 @@
 #define SW_TWSI_EOP_TWSI_RST	(SW_TWSI_OP_EOP | 7ULL << SW_TWSI_EOP_SHIFT)
 
 /* Controller command and status bits */
-#define TWSI_CTL_CE		0x80
+#define TWSI_CTL_CE		0x80	/* High level controller enable */
 #define TWSI_CTL_ENAB		0x40	/* Bus enable */
 #define TWSI_CTL_STA		0x20	/* Master-mode start, HW clears when done */
 #define TWSI_CTL_STP		0x10	/* Master-mode stop, HW clears when done */
 #define TWSI_CTL_IFLG		0x08	/* HW event, SW writes 0 to ACK */
 #define TWSI_CTL_AAK		0x04	/* Assert ACK */
 
-/* Some status values */
+/* Status values */
+#define STAT_ERROR		0x00
 #define STAT_START		0x08
-#define STAT_RSTART		0x10
+#define STAT_REP_START		0x10
 #define STAT_TXADDR_ACK		0x18
+#define STAT_TXADDR_NAK		0x20
 #define STAT_TXDATA_ACK		0x28
+#define STAT_TXDATA_NAK		0x30
+#define STAT_LOST_ARB_38	0x38
 #define STAT_RXADDR_ACK		0x40
+#define STAT_RXADDR_NAK		0x48
 #define STAT_RXDATA_ACK		0x50
+#define STAT_RXDATA_NAK		0x58
+#define STAT_SLAVE_60		0x60
+#define STAT_LOST_ARB_68	0x68
+#define STAT_SLAVE_70		0x70
+#define STAT_LOST_ARB_78	0x78
+#define STAT_SLAVE_80		0x80
+#define STAT_SLAVE_88		0x88
+#define STAT_GENDATA_ACK	0x90
+#define STAT_GENDATA_NAK	0x98
+#define STAT_SLAVE_A0		0xA0
+#define STAT_SLAVE_A8		0xA8
+#define STAT_LOST_ARB_B0	0xB0
+#define STAT_SLAVE_LOST		0xB8
+#define STAT_SLAVE_NAK		0xC0
+#define STAT_SLAVE_ACK		0xC8
+#define STAT_AD2W_ACK		0xD0
+#define STAT_AD2W_NAK		0xD8
 #define STAT_IDLE		0xF8
 
 /* TWSI_INT values */
+#define TWSI_INT_ST_INT		BIT_ULL(0)
+#define TWSI_INT_TS_INT		BIT_ULL(1)
+#define TWSI_INT_CORE_INT	BIT_ULL(2)
+#define TWSI_INT_ST_EN		BIT_ULL(4)
+#define TWSI_INT_TS_EN		BIT_ULL(5)
 #define TWSI_INT_CORE_EN	BIT_ULL(6)
 #define TWSI_INT_SDA_OVR	BIT_ULL(8)
 #define TWSI_INT_SCL_OVR	BIT_ULL(9)
+#define TWSI_INT_SDA		BIT_ULL(10)
+#define TWSI_INT_SCL		BIT_ULL(11)
+
+#define I2C_OCTEON_EVENT_WAIT 80 /* microseconds */
 
 struct octeon_i2c {
 	wait_queue_head_t queue;
 	struct i2c_adapter adap;
 	int irq;
+	int hlc_irq;		/* For cn7890 only */
 	u32 twsi_freq;
 	int sys_freq;
 	void __iomem *twsi_base;
 	struct device *dev;
+	bool hlc_enabled;
+	bool broken_irq_mode;
+	bool broken_irq_check;
+	void (*int_enable)(struct octeon_i2c *);
+	void (*int_disable)(struct octeon_i2c *);
+	void (*hlc_int_enable)(struct octeon_i2c *);
+	void (*hlc_int_disable)(struct octeon_i2c *);
+	atomic_t int_enable_cnt;
+	atomic_t hlc_int_enable_cnt;
 };
 
+static void octeon_i2c_writeq_flush(u64 val, void __iomem *addr)
+{
+	__raw_writeq(val, addr);
+	__raw_readq(addr);	/* wait for write to land */
+}
+
 /**
- * octeon_i2c_write_sw - write an I2C core register
+ * octeon_i2c_reg_write - write an I2C core register
  * @i2c: The struct octeon_i2c
  * @eop_reg: Register selector
  * @data: Value to be written
  *
  * The I2C core registers are accessed indirectly via the SW_TWSI CSR.
  */
-static void octeon_i2c_write_sw(struct octeon_i2c *i2c, u64 eop_reg, u8 data)
+static void octeon_i2c_reg_write(struct octeon_i2c *i2c, u64 eop_reg, u8 data)
 {
 	u64 tmp;
 
@@ -97,8 +155,13 @@
 	} while ((tmp & SW_TWSI_V) != 0);
 }
 
+#define octeon_i2c_ctl_write(i2c, val)					\
+	octeon_i2c_reg_write(i2c, SW_TWSI_EOP_TWSI_CTL, val)
+#define octeon_i2c_data_write(i2c, val)					\
+	octeon_i2c_reg_write(i2c, SW_TWSI_EOP_TWSI_DATA, val)
+
 /**
- * octeon_i2c_read_sw - read lower bits of an I2C core register
+ * octeon_i2c_reg_read - read lower bits of an I2C core register
  * @i2c: The struct octeon_i2c
  * @eop_reg: Register selector
  *
@@ -106,7 +169,7 @@
  *
  * The I2C core registers are accessed indirectly via the SW_TWSI CSR.
  */
-static u8 octeon_i2c_read_sw(struct octeon_i2c *i2c, u64 eop_reg)
+static u8 octeon_i2c_reg_read(struct octeon_i2c *i2c, u64 eop_reg)
 {
 	u64 tmp;
 
@@ -118,6 +181,24 @@
 	return tmp & 0xFF;
 }
 
+#define octeon_i2c_ctl_read(i2c)					\
+	octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_CTL)
+#define octeon_i2c_data_read(i2c)					\
+	octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_DATA)
+#define octeon_i2c_stat_read(i2c)					\
+	octeon_i2c_reg_read(i2c, SW_TWSI_EOP_TWSI_STAT)
+
+/**
+ * octeon_i2c_read_int - read the TWSI_INT register
+ * @i2c: The struct octeon_i2c
+ *
+ * Returns the value of the register.
+ */
+static u64 octeon_i2c_read_int(struct octeon_i2c *i2c)
+{
+	return __raw_readq(i2c->twsi_base + TWSI_INT);
+}
+
 /**
  * octeon_i2c_write_int - write the TWSI_INT register
  * @i2c: The struct octeon_i2c
@@ -125,8 +206,7 @@
  */
 static void octeon_i2c_write_int(struct octeon_i2c *i2c, u64 data)
 {
-	__raw_writeq(data, i2c->twsi_base + TWSI_INT);
-	__raw_readq(i2c->twsi_base + TWSI_INT);
+	octeon_i2c_writeq_flush(data, i2c->twsi_base + TWSI_INT);
 }
 
 /**
@@ -149,30 +229,96 @@
 }
 
 /**
- * octeon_i2c_unblock - unblock the bus
+ * octeon_i2c_int_enable78 - enable the CORE interrupt
  * @i2c: The struct octeon_i2c
  *
- * If there was a reset while a device was driving 0 to bus, bus is blocked.
- * We toggle it free manually by some clock cycles and send a stop.
+ * The interrupt will be asserted when there is non-STAT_IDLE state in the
+ * SW_TWSI_EOP_TWSI_STAT register.
  */
-static void octeon_i2c_unblock(struct octeon_i2c *i2c)
+static void octeon_i2c_int_enable78(struct octeon_i2c *i2c)
 {
-	int i;
+	atomic_inc_return(&i2c->int_enable_cnt);
+	enable_irq(i2c->irq);
+}
 
-	dev_dbg(i2c->dev, "%s\n", __func__);
+static void __octeon_i2c_irq_disable(atomic_t *cnt, int irq)
+{
+	int count;
 
-	for (i = 0; i < 9; i++) {
-		octeon_i2c_write_int(i2c, 0);
-		udelay(5);
-		octeon_i2c_write_int(i2c, TWSI_INT_SCL_OVR);
-		udelay(5);
+	/*
+	 * The interrupt can be disabled in two places, but we only
+	 * want to make the disable_irq_nosync() call once, so keep
+	 * track with the atomic variable.
+	 */
+	count = atomic_dec_if_positive(cnt);
+	if (count >= 0)
+		disable_irq_nosync(irq);
+}
+
+/* disable the CORE interrupt */
+static void octeon_i2c_int_disable78(struct octeon_i2c *i2c)
+{
+	__octeon_i2c_irq_disable(&i2c->int_enable_cnt, i2c->irq);
+}
+
+/**
+ * octeon_i2c_hlc_int_enable78 - enable the ST interrupt
+ * @i2c: The struct octeon_i2c
+ *
+ * The interrupt will be asserted when there is non-STAT_IDLE state in
+ * the SW_TWSI_EOP_TWSI_STAT register.
+ */
+static void octeon_i2c_hlc_int_enable78(struct octeon_i2c *i2c)
+{
+	atomic_inc_return(&i2c->hlc_int_enable_cnt);
+	enable_irq(i2c->hlc_irq);
+}
+
+/* disable the ST interrupt */
+static void octeon_i2c_hlc_int_disable78(struct octeon_i2c *i2c)
+{
+	__octeon_i2c_irq_disable(&i2c->hlc_int_enable_cnt, i2c->hlc_irq);
+}
+
+/*
+ * Cleanup low-level state & enable high-level controller.
+ */
+static void octeon_i2c_hlc_enable(struct octeon_i2c *i2c)
+{
+	int try = 0;
+	u64 val;
+
+	if (i2c->hlc_enabled)
+		return;
+	i2c->hlc_enabled = true;
+
+	while (1) {
+		val = octeon_i2c_ctl_read(i2c);
+		if (!(val & (TWSI_CTL_STA | TWSI_CTL_STP)))
+			break;
+
+		/* clear IFLG event */
+		if (val & TWSI_CTL_IFLG)
+			octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
+
+		if (try++ > 100) {
+			pr_err("%s: giving up\n", __func__);
+			break;
+		}
+
+		/* spin until any start/stop has finished */
+		udelay(10);
 	}
-	/* hand-crank a STOP */
-	octeon_i2c_write_int(i2c, TWSI_INT_SDA_OVR | TWSI_INT_SCL_OVR);
-	udelay(5);
-	octeon_i2c_write_int(i2c, TWSI_INT_SDA_OVR);
-	udelay(5);
-	octeon_i2c_write_int(i2c, 0);
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_CE | TWSI_CTL_AAK | TWSI_CTL_ENAB);
+}
+
+static void octeon_i2c_hlc_disable(struct octeon_i2c *i2c)
+{
+	if (!i2c->hlc_enabled)
+		return;
+
+	i2c->hlc_enabled = false;
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
 }
 
 /* interrupt service routine */
@@ -180,16 +326,44 @@
 {
 	struct octeon_i2c *i2c = dev_id;
 
-	octeon_i2c_int_disable(i2c);
+	i2c->int_disable(i2c);
 	wake_up(&i2c->queue);
 
 	return IRQ_HANDLED;
 }
 
-
-static int octeon_i2c_test_iflg(struct octeon_i2c *i2c)
+/* HLC interrupt service routine */
+static irqreturn_t octeon_i2c_hlc_isr78(int irq, void *dev_id)
 {
-	return (octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_CTL) & TWSI_CTL_IFLG) != 0;
+	struct octeon_i2c *i2c = dev_id;
+
+	i2c->hlc_int_disable(i2c);
+	wake_up(&i2c->queue);
+
+	return IRQ_HANDLED;
+}
+
+static bool octeon_i2c_test_iflg(struct octeon_i2c *i2c)
+{
+	return (octeon_i2c_ctl_read(i2c) & TWSI_CTL_IFLG);
+}
+
+static bool octeon_i2c_test_ready(struct octeon_i2c *i2c, bool *first)
+{
+	if (octeon_i2c_test_iflg(i2c))
+		return true;
+
+	if (*first) {
+		*first = false;
+		return false;
+	}
+
+	/*
+	 * IRQ has signaled an event but IFLG hasn't changed.
+	 * Sleep and retry once.
+	 */
+	usleep_range(I2C_OCTEON_EVENT_WAIT, 2 * I2C_OCTEON_EVENT_WAIT);
+	return octeon_i2c_test_iflg(i2c);
 }
 
 /**
@@ -201,233 +375,379 @@
 static int octeon_i2c_wait(struct octeon_i2c *i2c)
 {
 	long time_left;
+	bool first = 1;
 
-	octeon_i2c_int_enable(i2c);
-	time_left = wait_event_timeout(i2c->queue, octeon_i2c_test_iflg(i2c),
-				       i2c->adap.timeout);
-	octeon_i2c_int_disable(i2c);
-	if (!time_left) {
-		dev_dbg(i2c->dev, "%s: timeout\n", __func__);
-		return -ETIMEDOUT;
+	/*
+	 * Some chip revisions don't assert the irq in the interrupt
+	 * controller. So we must poll for the IFLG change.
+	 */
+	if (i2c->broken_irq_mode) {
+		u64 end = get_jiffies_64() + i2c->adap.timeout;
+
+		while (!octeon_i2c_test_iflg(i2c) &&
+		       time_before64(get_jiffies_64(), end))
+			usleep_range(I2C_OCTEON_EVENT_WAIT / 2, I2C_OCTEON_EVENT_WAIT);
+
+		return octeon_i2c_test_iflg(i2c) ? 0 : -ETIMEDOUT;
 	}
 
+	i2c->int_enable(i2c);
+	time_left = wait_event_timeout(i2c->queue, octeon_i2c_test_ready(i2c, &first),
+				       i2c->adap.timeout);
+	i2c->int_disable(i2c);
+
+	if (i2c->broken_irq_check && !time_left &&
+	    octeon_i2c_test_iflg(i2c)) {
+		dev_err(i2c->dev, "broken irq connection detected, switching to polling mode.\n");
+		i2c->broken_irq_mode = true;
+		return 0;
+	}
+
+	if (!time_left)
+		return -ETIMEDOUT;
+
 	return 0;
 }
 
-/**
- * octeon_i2c_start - send START to the bus
- * @i2c: The struct octeon_i2c
- *
- * Returns 0 on success, otherwise a negative errno.
- */
-static int octeon_i2c_start(struct octeon_i2c *i2c)
+static int octeon_i2c_check_status(struct octeon_i2c *i2c, int final_read)
 {
-	int result;
-	u8 data;
+	u8 stat = octeon_i2c_stat_read(i2c);
 
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-			    TWSI_CTL_ENAB | TWSI_CTL_STA);
+	switch (stat) {
+	/* Everything is fine */
+	case STAT_IDLE:
+	case STAT_AD2W_ACK:
+	case STAT_RXADDR_ACK:
+	case STAT_TXADDR_ACK:
+	case STAT_TXDATA_ACK:
+		return 0;
 
-	result = octeon_i2c_wait(i2c);
-	if (result) {
-		if (octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT) == STAT_IDLE) {
-			/*
-			 * Controller refused to send start flag May
-			 * be a client is holding SDA low - let's try
-			 * to free it.
-			 */
-			octeon_i2c_unblock(i2c);
-			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-					    TWSI_CTL_ENAB | TWSI_CTL_STA);
-			result = octeon_i2c_wait(i2c);
-		}
-		if (result)
-			return result;
-	}
+	/* ACK allowed on pre-terminal bytes only */
+	case STAT_RXDATA_ACK:
+		if (!final_read)
+			return 0;
+		return -EIO;
 
-	data = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
-	if ((data != STAT_START) && (data != STAT_RSTART)) {
-		dev_err(i2c->dev, "%s: bad status (0x%x)\n", __func__, data);
+	/* NAK allowed on terminal byte only */
+	case STAT_RXDATA_NAK:
+		if (final_read)
+			return 0;
+		return -EIO;
+
+	/* Arbitration lost */
+	case STAT_LOST_ARB_38:
+	case STAT_LOST_ARB_68:
+	case STAT_LOST_ARB_78:
+	case STAT_LOST_ARB_B0:
+		return -EAGAIN;
+
+	/* Being addressed as slave, should back off & listen */
+	case STAT_SLAVE_60:
+	case STAT_SLAVE_70:
+	case STAT_GENDATA_ACK:
+	case STAT_GENDATA_NAK:
+		return -EOPNOTSUPP;
+
+	/* Core busy as slave */
+	case STAT_SLAVE_80:
+	case STAT_SLAVE_88:
+	case STAT_SLAVE_A0:
+	case STAT_SLAVE_A8:
+	case STAT_SLAVE_LOST:
+	case STAT_SLAVE_NAK:
+	case STAT_SLAVE_ACK:
+		return -EOPNOTSUPP;
+
+	case STAT_TXDATA_NAK:
+		return -EIO;
+	case STAT_TXADDR_NAK:
+	case STAT_RXADDR_NAK:
+	case STAT_AD2W_NAK:
+		return -ENXIO;
+	default:
+		dev_err(i2c->dev, "unhandled state: %d\n", stat);
 		return -EIO;
 	}
-
-	return 0;
 }
 
-/* send STOP to the bus */
-static void octeon_i2c_stop(struct octeon_i2c *i2c)
+static bool octeon_i2c_hlc_test_valid(struct octeon_i2c *i2c)
 {
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-			    TWSI_CTL_ENAB | TWSI_CTL_STP);
+	return (__raw_readq(i2c->twsi_base + SW_TWSI) & SW_TWSI_V) == 0;
+}
+
+static bool octeon_i2c_hlc_test_ready(struct octeon_i2c *i2c, bool *first)
+{
+	/* check if valid bit is cleared */
+	if (octeon_i2c_hlc_test_valid(i2c))
+		return true;
+
+	if (*first) {
+		*first = false;
+		return false;
+	}
+
+	/*
+	 * IRQ has signaled an event but valid bit isn't cleared.
+	 * Sleep and retry once.
+	 */
+	usleep_range(I2C_OCTEON_EVENT_WAIT, 2 * I2C_OCTEON_EVENT_WAIT);
+	return octeon_i2c_hlc_test_valid(i2c);
+}
+
+static void octeon_i2c_hlc_int_enable(struct octeon_i2c *i2c)
+{
+	octeon_i2c_write_int(i2c, TWSI_INT_ST_EN);
+}
+
+static void octeon_i2c_hlc_int_clear(struct octeon_i2c *i2c)
+{
+	/* clear ST/TS events, listen for neither */
+	octeon_i2c_write_int(i2c, TWSI_INT_ST_INT | TWSI_INT_TS_INT);
 }
 
 /**
- * octeon_i2c_write - send data to the bus via low-level controller
+ * octeon_i2c_hlc_wait - wait for an HLC operation to complete
  * @i2c: The struct octeon_i2c
- * @target: Target address
- * @data: Pointer to the data to be sent
- * @length: Length of the data
  *
- * The address is sent over the bus, then the data.
- *
- * Returns 0 on success, otherwise a negative errno.
+ * Returns 0 on success, otherwise -ETIMEDOUT.
  */
-static int octeon_i2c_write(struct octeon_i2c *i2c, int target,
-			    const u8 *data, int length)
+static int octeon_i2c_hlc_wait(struct octeon_i2c *i2c)
 {
-	int i, result;
-	u8 tmp;
+	bool first = 1;
+	int time_left;
 
-	result = octeon_i2c_start(i2c);
-	if (result)
-		return result;
+	/*
+	 * Some cn38xx boards don't assert the irq in the interrupt
+	 * controller. So we must poll for the valid bit change.
+	 */
+	if (i2c->broken_irq_mode) {
+		u64 end = get_jiffies_64() + i2c->adap.timeout;
 
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_DATA, target << 1);
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL, TWSI_CTL_ENAB);
+		while (!octeon_i2c_hlc_test_valid(i2c) &&
+		       time_before64(get_jiffies_64(), end))
+			usleep_range(I2C_OCTEON_EVENT_WAIT / 2, I2C_OCTEON_EVENT_WAIT);
 
-	result = octeon_i2c_wait(i2c);
-	if (result)
-		return result;
-
-	for (i = 0; i < length; i++) {
-		tmp = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
-
-		if ((tmp != STAT_TXADDR_ACK) && (tmp != STAT_TXDATA_ACK)) {
-			dev_err(i2c->dev,
-				"%s: bad status before write (0x%x)\n",
-				__func__, tmp);
-			return -EIO;
-		}
-
-		octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_DATA, data[i]);
-		octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL, TWSI_CTL_ENAB);
-
-		result = octeon_i2c_wait(i2c);
-		if (result)
-			return result;
+		return octeon_i2c_hlc_test_valid(i2c) ? 0 : -ETIMEDOUT;
 	}
 
+	i2c->hlc_int_enable(i2c);
+	time_left = wait_event_timeout(i2c->queue,
+				       octeon_i2c_hlc_test_ready(i2c, &first),
+				       i2c->adap.timeout);
+	i2c->hlc_int_disable(i2c);
+	if (!time_left)
+		octeon_i2c_hlc_int_clear(i2c);
+
+	if (i2c->broken_irq_check && !time_left &&
+	    octeon_i2c_hlc_test_valid(i2c)) {
+		dev_err(i2c->dev, "broken irq connection detected, switching to polling mode.\n");
+		i2c->broken_irq_mode = true;
+		return 0;
+	}
+
+	if (!time_left)
+		return -ETIMEDOUT;
 	return 0;
 }
 
-/**
- * octeon_i2c_read - receive data from the bus via low-level controller
- * @i2c: The struct octeon_i2c
- * @target: Target address
- * @data: Pointer to the location to store the data
- * @rlength: Length of the data
- * @recv_len: flag for length byte
- *
- * The address is sent over the bus, then the data is read.
- *
- * Returns 0 on success, otherwise a negative errno.
- */
-static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
-			   u8 *data, u16 *rlength, bool recv_len)
+/* high-level-controller pure read of up to 8 bytes */
+static int octeon_i2c_hlc_read(struct octeon_i2c *i2c, struct i2c_msg *msgs)
 {
-	int i, result, length = *rlength;
-	u8 tmp;
+	int i, j, ret = 0;
+	u64 cmd;
 
-	if (length < 1)
-		return -EINVAL;
+	octeon_i2c_hlc_enable(i2c);
+	octeon_i2c_hlc_int_clear(i2c);
 
-	result = octeon_i2c_start(i2c);
-	if (result)
-		return result;
+	cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR;
+	/* SIZE */
+	cmd |= (u64)(msgs[0].len - 1) << SW_TWSI_SIZE_SHIFT;
+	/* A */
+	cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT;
 
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_DATA, (target << 1) | 1);
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL, TWSI_CTL_ENAB);
+	if (msgs[0].flags & I2C_M_TEN)
+		cmd |= SW_TWSI_OP_10;
+	else
+		cmd |= SW_TWSI_OP_7;
 
-	result = octeon_i2c_wait(i2c);
-	if (result)
-		return result;
+	octeon_i2c_writeq_flush(cmd, i2c->twsi_base + SW_TWSI);
+	ret = octeon_i2c_hlc_wait(i2c);
+	if (ret)
+		goto err;
 
-	for (i = 0; i < length; i++) {
-		tmp = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
+	cmd = __raw_readq(i2c->twsi_base + SW_TWSI);
+	if ((cmd & SW_TWSI_R) == 0)
+		return -EAGAIN;
 
-		if ((tmp != STAT_RXDATA_ACK) && (tmp != STAT_RXADDR_ACK)) {
-			dev_err(i2c->dev,
-				"%s: bad status before read (0x%x)\n",
-				__func__, tmp);
-			return -EIO;
-		}
+	for (i = 0, j = msgs[0].len - 1; i  < msgs[0].len && i < 4; i++, j--)
+		msgs[0].buf[j] = (cmd >> (8 * i)) & 0xff;
 
-		if (i + 1 < length)
-			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-					    TWSI_CTL_ENAB | TWSI_CTL_AAK);
-		else
-			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-					    TWSI_CTL_ENAB);
-
-		result = octeon_i2c_wait(i2c);
-		if (result)
-			return result;
-
-		data[i] = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_DATA);
-		if (recv_len && i == 0) {
-			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(i2c->dev,
-					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
-					__func__, data[i]);
-				return -EPROTO;
-			}
-			length += data[i];
-		}
+	if (msgs[0].len > 4) {
+		cmd = __raw_readq(i2c->twsi_base + SW_TWSI_EXT);
+		for (i = 0; i  < msgs[0].len - 4 && i < 4; i++, j--)
+			msgs[0].buf[j] = (cmd >> (8 * i)) & 0xff;
 	}
-	*rlength = length;
-	return 0;
+
+err:
+	return ret;
 }
 
-/**
- * octeon_i2c_xfer - The driver's master_xfer function
- * @adap: Pointer to the i2c_adapter structure
- * @msgs: Pointer to the messages to be processed
- * @num: Length of the MSGS array
- *
- * Returns the number of messages processed, or a negative errno on failure.
- */
-static int octeon_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
-			   int num)
+/* high-level-controller pure write of up to 8 bytes */
+static int octeon_i2c_hlc_write(struct octeon_i2c *i2c, struct i2c_msg *msgs)
 {
-	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
-	int i, ret = 0;
+	int i, j, ret = 0;
+	u64 cmd;
 
-	for (i = 0; ret == 0 && i < num; i++) {
-		struct i2c_msg *pmsg = &msgs[i];
+	octeon_i2c_hlc_enable(i2c);
+	octeon_i2c_hlc_int_clear(i2c);
 
-		dev_dbg(i2c->dev,
-			"Doing %s %d byte(s) to/from 0x%02x - %d of %d messages\n",
-			 pmsg->flags & I2C_M_RD ? "read" : "write",
-			 pmsg->len, pmsg->addr, i + 1, num);
-		if (pmsg->flags & I2C_M_RD)
-			ret = octeon_i2c_read(i2c, pmsg->addr, pmsg->buf,
-					      &pmsg->len, pmsg->flags & I2C_M_RECV_LEN);
-		else
-			ret = octeon_i2c_write(i2c, pmsg->addr, pmsg->buf,
-					       pmsg->len);
+	cmd = SW_TWSI_V | SW_TWSI_SOVR;
+	/* SIZE */
+	cmd |= (u64)(msgs[0].len - 1) << SW_TWSI_SIZE_SHIFT;
+	/* A */
+	cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT;
+
+	if (msgs[0].flags & I2C_M_TEN)
+		cmd |= SW_TWSI_OP_10;
+	else
+		cmd |= SW_TWSI_OP_7;
+
+	for (i = 0, j = msgs[0].len - 1; i  < msgs[0].len && i < 4; i++, j--)
+		cmd |= (u64)msgs[0].buf[j] << (8 * i);
+
+	if (msgs[0].len > 4) {
+		u64 ext = 0;
+
+		for (i = 0; i < msgs[0].len - 4 && i < 4; i++, j--)
+			ext |= (u64)msgs[0].buf[j] << (8 * i);
+		octeon_i2c_writeq_flush(ext, i2c->twsi_base + SW_TWSI_EXT);
 	}
-	octeon_i2c_stop(i2c);
 
-	return (ret != 0) ? ret : num;
+	octeon_i2c_writeq_flush(cmd, i2c->twsi_base + SW_TWSI);
+	ret = octeon_i2c_hlc_wait(i2c);
+	if (ret)
+		goto err;
+
+	cmd = __raw_readq(i2c->twsi_base + SW_TWSI);
+	if ((cmd & SW_TWSI_R) == 0)
+		return -EAGAIN;
+
+	ret = octeon_i2c_check_status(i2c, false);
+
+err:
+	return ret;
 }
 
-static u32 octeon_i2c_functionality(struct i2c_adapter *adap)
+/* high-level-controller composite write+read, msg0=addr, msg1=data */
+static int octeon_i2c_hlc_comp_read(struct octeon_i2c *i2c, struct i2c_msg *msgs)
 {
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
-	       I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_SMBUS_BLOCK_PROC_CALL;
+	int i, j, ret = 0;
+	u64 cmd;
+
+	octeon_i2c_hlc_enable(i2c);
+
+	cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR;
+	/* SIZE */
+	cmd |= (u64)(msgs[1].len - 1) << SW_TWSI_SIZE_SHIFT;
+	/* A */
+	cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT;
+
+	if (msgs[0].flags & I2C_M_TEN)
+		cmd |= SW_TWSI_OP_10_IA;
+	else
+		cmd |= SW_TWSI_OP_7_IA;
+
+	if (msgs[0].len == 2) {
+		u64 ext = 0;
+
+		cmd |= SW_TWSI_EIA;
+		ext = (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT;
+		cmd |= (u64)msgs[0].buf[1] << SW_TWSI_IA_SHIFT;
+		octeon_i2c_writeq_flush(ext, i2c->twsi_base + SW_TWSI_EXT);
+	} else {
+		cmd |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT;
+	}
+
+	octeon_i2c_hlc_int_clear(i2c);
+	octeon_i2c_writeq_flush(cmd, i2c->twsi_base + SW_TWSI);
+
+	ret = octeon_i2c_hlc_wait(i2c);
+	if (ret)
+		goto err;
+
+	cmd = __raw_readq(i2c->twsi_base + SW_TWSI);
+	if ((cmd & SW_TWSI_R) == 0)
+		return -EAGAIN;
+
+	for (i = 0, j = msgs[1].len - 1; i  < msgs[1].len && i < 4; i++, j--)
+		msgs[1].buf[j] = (cmd >> (8 * i)) & 0xff;
+
+	if (msgs[1].len > 4) {
+		cmd = __raw_readq(i2c->twsi_base + SW_TWSI_EXT);
+		for (i = 0; i  < msgs[1].len - 4 && i < 4; i++, j--)
+			msgs[1].buf[j] = (cmd >> (8 * i)) & 0xff;
+	}
+
+err:
+	return ret;
 }
 
-static const struct i2c_algorithm octeon_i2c_algo = {
-	.master_xfer = octeon_i2c_xfer,
-	.functionality = octeon_i2c_functionality,
-};
+/* high-level-controller composite write+write, m[0]len<=2, m[1]len<=8 */
+static int octeon_i2c_hlc_comp_write(struct octeon_i2c *i2c, struct i2c_msg *msgs)
+{
+	bool set_ext = false;
+	int i, j, ret = 0;
+	u64 cmd, ext = 0;
 
-static struct i2c_adapter octeon_i2c_ops = {
-	.owner = THIS_MODULE,
-	.name = "OCTEON adapter",
-	.algo = &octeon_i2c_algo,
-	.timeout = HZ / 50,
-};
+	octeon_i2c_hlc_enable(i2c);
+
+	cmd = SW_TWSI_V | SW_TWSI_SOVR;
+	/* SIZE */
+	cmd |= (u64)(msgs[1].len - 1) << SW_TWSI_SIZE_SHIFT;
+	/* A */
+	cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT;
+
+	if (msgs[0].flags & I2C_M_TEN)
+		cmd |= SW_TWSI_OP_10_IA;
+	else
+		cmd |= SW_TWSI_OP_7_IA;
+
+	if (msgs[0].len == 2) {
+		cmd |= SW_TWSI_EIA;
+		ext |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT;
+		set_ext = true;
+		cmd |= (u64)msgs[0].buf[1] << SW_TWSI_IA_SHIFT;
+	} else {
+		cmd |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT;
+	}
+
+	for (i = 0, j = msgs[1].len - 1; i  < msgs[1].len && i < 4; i++, j--)
+		cmd |= (u64)msgs[1].buf[j] << (8 * i);
+
+	if (msgs[1].len > 4) {
+		for (i = 0; i < msgs[1].len - 4 && i < 4; i++, j--)
+			ext |= (u64)msgs[1].buf[j] << (8 * i);
+		set_ext = true;
+	}
+	if (set_ext)
+		octeon_i2c_writeq_flush(ext, i2c->twsi_base + SW_TWSI_EXT);
+
+	octeon_i2c_hlc_int_clear(i2c);
+	octeon_i2c_writeq_flush(cmd, i2c->twsi_base + SW_TWSI);
+
+	ret = octeon_i2c_hlc_wait(i2c);
+	if (ret)
+		goto err;
+
+	cmd = __raw_readq(i2c->twsi_base + SW_TWSI);
+	if ((cmd & SW_TWSI_R) == 0)
+		return -EAGAIN;
+
+	ret = octeon_i2c_check_status(i2c, false);
+
+err:
+	return ret;
+}
 
 /* calculate and set clock divisors */
 static void octeon_i2c_set_clock(struct octeon_i2c *i2c)
@@ -467,42 +787,342 @@
 			}
 		}
 	}
-	octeon_i2c_write_sw(i2c, SW_TWSI_OP_TWSI_CLK, thp);
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CLKCTL, (mdiv << 3) | ndiv);
+	octeon_i2c_reg_write(i2c, SW_TWSI_OP_TWSI_CLK, thp);
+	octeon_i2c_reg_write(i2c, SW_TWSI_EOP_TWSI_CLKCTL, (mdiv << 3) | ndiv);
 }
 
 static int octeon_i2c_init_lowlevel(struct octeon_i2c *i2c)
 {
-	u8 status;
+	u8 status = 0;
 	int tries;
 
-	/* disable high level controller, enable bus access */
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL, TWSI_CTL_ENAB);
-
 	/* reset controller */
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_RST, 0);
+	octeon_i2c_reg_write(i2c, SW_TWSI_EOP_TWSI_RST, 0);
 
-	for (tries = 10; tries; tries--) {
+	for (tries = 10; tries && status != STAT_IDLE; tries--) {
 		udelay(1);
-		status = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
+		status = octeon_i2c_stat_read(i2c);
 		if (status == STAT_IDLE)
-			return 0;
+			break;
 	}
-	dev_err(i2c->dev, "%s: TWSI_RST failed! (0x%x)\n", __func__, status);
-	return -EIO;
+
+	if (status != STAT_IDLE) {
+		dev_err(i2c->dev, "%s: TWSI_RST failed! (0x%x)\n",
+			__func__, status);
+		return -EIO;
+	}
+
+	/* toggle twice to force both teardowns */
+	octeon_i2c_hlc_enable(i2c);
+	octeon_i2c_hlc_disable(i2c);
+	return 0;
 }
 
+static int octeon_i2c_recovery(struct octeon_i2c *i2c)
+{
+	int ret;
+
+	ret = i2c_recover_bus(&i2c->adap);
+	if (ret)
+		/* recover failed, try hardware re-init */
+		ret = octeon_i2c_init_lowlevel(i2c);
+	return ret;
+}
+
+/**
+ * octeon_i2c_start - send START to the bus
+ * @i2c: The struct octeon_i2c
+ *
+ * Returns 0 on success, otherwise a negative errno.
+ */
+static int octeon_i2c_start(struct octeon_i2c *i2c)
+{
+	int ret;
+	u8 stat;
+
+	octeon_i2c_hlc_disable(i2c);
+
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB | TWSI_CTL_STA);
+	ret = octeon_i2c_wait(i2c);
+	if (ret)
+		goto error;
+
+	stat = octeon_i2c_stat_read(i2c);
+	if (stat == STAT_START || stat == STAT_REP_START)
+		/* START successful, bail out */
+		return 0;
+
+error:
+	/* START failed, try to recover */
+	ret = octeon_i2c_recovery(i2c);
+	return (ret) ? ret : -EAGAIN;
+}
+
+/* send STOP to the bus */
+static void octeon_i2c_stop(struct octeon_i2c *i2c)
+{
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB | TWSI_CTL_STP);
+}
+
+/**
+ * octeon_i2c_write - send data to the bus via low-level controller
+ * @i2c: The struct octeon_i2c
+ * @target: Target address
+ * @data: Pointer to the data to be sent
+ * @length: Length of the data
+ *
+ * The address is sent over the bus, then the data.
+ *
+ * Returns 0 on success, otherwise a negative errno.
+ */
+static int octeon_i2c_write(struct octeon_i2c *i2c, int target,
+			    const u8 *data, int length)
+{
+	int i, result;
+
+	octeon_i2c_data_write(i2c, target << 1);
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
+
+	result = octeon_i2c_wait(i2c);
+	if (result)
+		return result;
+
+	for (i = 0; i < length; i++) {
+		result = octeon_i2c_check_status(i2c, false);
+		if (result)
+			return result;
+
+		octeon_i2c_data_write(i2c, data[i]);
+		octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
+
+		result = octeon_i2c_wait(i2c);
+		if (result)
+			return result;
+	}
+
+	return 0;
+}
+
+/**
+ * octeon_i2c_read - receive data from the bus via low-level controller
+ * @i2c: The struct octeon_i2c
+ * @target: Target address
+ * @data: Pointer to the location to store the data
+ * @rlength: Length of the data
+ * @recv_len: flag for length byte
+ *
+ * The address is sent over the bus, then the data is read.
+ *
+ * Returns 0 on success, otherwise a negative errno.
+ */
+static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
+			   u8 *data, u16 *rlength, bool recv_len)
+{
+	int i, result, length = *rlength;
+	bool final_read = false;
+
+	octeon_i2c_data_write(i2c, (target << 1) | 1);
+	octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
+
+	result = octeon_i2c_wait(i2c);
+	if (result)
+		return result;
+
+	/* address OK ? */
+	result = octeon_i2c_check_status(i2c, false);
+	if (result)
+		return result;
+
+	for (i = 0; i < length; i++) {
+		/* for the last byte TWSI_CTL_AAK must not be set */
+		if (i + 1 == length)
+			final_read = true;
+
+		/* clear iflg to allow next event */
+		if (final_read)
+			octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB);
+		else
+			octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB | TWSI_CTL_AAK);
+
+		result = octeon_i2c_wait(i2c);
+		if (result)
+			return result;
+
+		data[i] = octeon_i2c_data_read(i2c);
+		if (recv_len && i == 0) {
+			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
+				dev_err(i2c->dev,
+					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
+					__func__, data[i]);
+				return -EPROTO;
+			}
+			length += data[i];
+		}
+
+		result = octeon_i2c_check_status(i2c, final_read);
+		if (result)
+			return result;
+	}
+	*rlength = length;
+	return 0;
+}
+
+/**
+ * octeon_i2c_xfer - The driver's master_xfer function
+ * @adap: Pointer to the i2c_adapter structure
+ * @msgs: Pointer to the messages to be processed
+ * @num: Length of the MSGS array
+ *
+ * Returns the number of messages processed, or a negative errno on failure.
+ */
+static int octeon_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			   int num)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+	int i, ret = 0;
+
+	if (num == 1) {
+		if (msgs[0].len > 0 && msgs[0].len <= 8) {
+			if (msgs[0].flags & I2C_M_RD)
+				ret = octeon_i2c_hlc_read(i2c, msgs);
+			else
+				ret = octeon_i2c_hlc_write(i2c, msgs);
+			goto out;
+		}
+	} else if (num == 2) {
+		if ((msgs[0].flags & I2C_M_RD) == 0 &&
+		    (msgs[1].flags & I2C_M_RECV_LEN) == 0 &&
+		    msgs[0].len > 0 && msgs[0].len <= 2 &&
+		    msgs[1].len > 0 && msgs[1].len <= 8 &&
+		    msgs[0].addr == msgs[1].addr) {
+			if (msgs[1].flags & I2C_M_RD)
+				ret = octeon_i2c_hlc_comp_read(i2c, msgs);
+			else
+				ret = octeon_i2c_hlc_comp_write(i2c, msgs);
+			goto out;
+		}
+	}
+
+	for (i = 0; ret == 0 && i < num; i++) {
+		struct i2c_msg *pmsg = &msgs[i];
+
+		/* zero-length messages are not supported */
+		if (!pmsg->len) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		ret = octeon_i2c_start(i2c);
+		if (ret)
+			return ret;
+
+		if (pmsg->flags & I2C_M_RD)
+			ret = octeon_i2c_read(i2c, pmsg->addr, pmsg->buf,
+					      &pmsg->len, pmsg->flags & I2C_M_RECV_LEN);
+		else
+			ret = octeon_i2c_write(i2c, pmsg->addr, pmsg->buf,
+					       pmsg->len);
+	}
+	octeon_i2c_stop(i2c);
+out:
+	return (ret != 0) ? ret : num;
+}
+
+static int octeon_i2c_get_scl(struct i2c_adapter *adap)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+	u64 state;
+
+	state = octeon_i2c_read_int(i2c);
+	return state & TWSI_INT_SCL;
+}
+
+static void octeon_i2c_set_scl(struct i2c_adapter *adap, int val)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+
+	octeon_i2c_write_int(i2c, TWSI_INT_SCL_OVR);
+}
+
+static int octeon_i2c_get_sda(struct i2c_adapter *adap)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+	u64 state;
+
+	state = octeon_i2c_read_int(i2c);
+	return state & TWSI_INT_SDA;
+}
+
+static void octeon_i2c_prepare_recovery(struct i2c_adapter *adap)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+
+	/*
+	 * The stop resets the state machine, does not _transmit_ STOP unless
+	 * engine was active.
+	 */
+	octeon_i2c_stop(i2c);
+
+	octeon_i2c_hlc_disable(i2c);
+	octeon_i2c_write_int(i2c, 0);
+}
+
+static void octeon_i2c_unprepare_recovery(struct i2c_adapter *adap)
+{
+	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+
+	octeon_i2c_write_int(i2c, 0);
+}
+
+static struct i2c_bus_recovery_info octeon_i2c_recovery_info = {
+	.recover_bus = i2c_generic_scl_recovery,
+	.get_scl = octeon_i2c_get_scl,
+	.set_scl = octeon_i2c_set_scl,
+	.get_sda = octeon_i2c_get_sda,
+	.prepare_recovery = octeon_i2c_prepare_recovery,
+	.unprepare_recovery = octeon_i2c_unprepare_recovery,
+};
+
+static u32 octeon_i2c_functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK) |
+	       I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_SMBUS_BLOCK_PROC_CALL;
+}
+
+static const struct i2c_algorithm octeon_i2c_algo = {
+	.master_xfer = octeon_i2c_xfer,
+	.functionality = octeon_i2c_functionality,
+};
+
+static struct i2c_adapter octeon_i2c_ops = {
+	.owner = THIS_MODULE,
+	.name = "OCTEON adapter",
+	.algo = &octeon_i2c_algo,
+};
+
 static int octeon_i2c_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
+	int irq, result = 0, hlc_irq = 0;
 	struct resource *res_mem;
 	struct octeon_i2c *i2c;
-	int irq, result = 0;
+	bool cn78xx_style;
 
-	/* All adaptors have an irq.  */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
+	cn78xx_style = of_device_is_compatible(node, "cavium,octeon-7890-twsi");
+	if (cn78xx_style) {
+		hlc_irq = platform_get_irq(pdev, 0);
+		if (hlc_irq < 0)
+			return hlc_irq;
+
+		irq = platform_get_irq(pdev, 2);
+		if (irq < 0)
+			return irq;
+	} else {
+		/* All adaptors have an irq.  */
+		irq = platform_get_irq(pdev, 0);
+		if (irq < 0)
+			return irq;
+	}
 
 	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
 	if (!i2c) {
@@ -537,6 +1157,31 @@
 
 	i2c->irq = irq;
 
+	if (cn78xx_style) {
+		i2c->hlc_irq = hlc_irq;
+
+		i2c->int_enable = octeon_i2c_int_enable78;
+		i2c->int_disable = octeon_i2c_int_disable78;
+		i2c->hlc_int_enable = octeon_i2c_hlc_int_enable78;
+		i2c->hlc_int_disable = octeon_i2c_hlc_int_disable78;
+
+		irq_set_status_flags(i2c->irq, IRQ_NOAUTOEN);
+		irq_set_status_flags(i2c->hlc_irq, IRQ_NOAUTOEN);
+
+		result = devm_request_irq(&pdev->dev, i2c->hlc_irq,
+					  octeon_i2c_hlc_isr78, 0,
+					  DRV_NAME, i2c);
+		if (result < 0) {
+			dev_err(i2c->dev, "failed to attach interrupt\n");
+			goto out;
+		}
+	} else {
+		i2c->int_enable = octeon_i2c_int_enable;
+		i2c->int_disable = octeon_i2c_int_disable;
+		i2c->hlc_int_enable = octeon_i2c_hlc_int_enable;
+		i2c->hlc_int_disable = octeon_i2c_int_disable;
+	}
+
 	result = devm_request_irq(&pdev->dev, i2c->irq,
 				  octeon_i2c_isr, 0, DRV_NAME, i2c);
 	if (result < 0) {
@@ -544,6 +1189,9 @@
 		goto out;
 	}
 
+	if (OCTEON_IS_MODEL(OCTEON_CN38XX))
+		i2c->broken_irq_check = true;
+
 	result = octeon_i2c_init_lowlevel(i2c);
 	if (result) {
 		dev_err(i2c->dev, "init low level failed\n");
@@ -553,6 +1201,9 @@
 	octeon_i2c_set_clock(i2c);
 
 	i2c->adap = octeon_i2c_ops;
+	i2c->adap.timeout = msecs_to_jiffies(2);
+	i2c->adap.retries = 5;
+	i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info;
 	i2c->adap.dev.parent = &pdev->dev;
 	i2c->adap.dev.of_node = node;
 	i2c_set_adapdata(&i2c->adap, i2c);
@@ -580,6 +1231,7 @@
 
 static const struct of_device_id octeon_i2c_match[] = {
 	{ .compatible = "cavium,octeon-3860-twsi", },
+	{ .compatible = "cavium,octeon-7890-twsi", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, octeon_i2c_match);
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 13c4529..ab1279b 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -185,7 +185,6 @@
 #define OMAP_I2C_IP_V2_INTERRUPTS_MASK	0x6FFF
 
 struct omap_i2c_dev {
-	spinlock_t		lock;		/* IRQ synchronization */
 	struct device		*dev;
 	void __iomem		*base;		/* virtual */
 	int			irq;
@@ -995,15 +994,12 @@
 	u16 mask;
 	u16 stat;
 
-	spin_lock(&omap->lock);
-	mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
 	stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
+	mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
 
 	if (stat & mask)
 		ret = IRQ_WAKE_THREAD;
 
-	spin_unlock(&omap->lock);
-
 	return ret;
 }
 
@@ -1011,12 +1007,10 @@
 omap_i2c_isr_thread(int this_irq, void *dev_id)
 {
 	struct omap_i2c_dev *omap = dev_id;
-	unsigned long flags;
 	u16 bits;
 	u16 stat;
 	int err = 0, count = 0;
 
-	spin_lock_irqsave(&omap->lock, flags);
 	do {
 		bits = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
 		stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
@@ -1142,8 +1136,6 @@
 	omap_i2c_complete_cmd(omap, err);
 
 out:
-	spin_unlock_irqrestore(&omap->lock, flags);
-
 	return IRQ_HANDLED;
 }
 
@@ -1330,8 +1322,6 @@
 	omap->dev = &pdev->dev;
 	omap->irq = irq;
 
-	spin_lock_init(&omap->lock);
-
 	platform_set_drvdata(pdev, omap);
 	init_completion(&omap->cmd_complete);
 
diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c
index 6abcf69..b0d9dee 100644
--- a/drivers/i2c/busses/i2c-powermac.c
+++ b/drivers/i2c/busses/i2c-powermac.c
@@ -150,13 +150,11 @@
 {
 	struct pmac_i2c_bus	*bus = i2c_get_adapdata(adap);
 	int			rc = 0;
-	int			read;
 	int			addrdir;
 
 	if (msgs->flags & I2C_M_TEN)
 		return -EINVAL;
-	read = (msgs->flags & I2C_M_RD) != 0;
-	addrdir = (msgs->addr << 1) | read;
+	addrdir = i2c_8bit_addr_from_msg(msgs);
 
 	rc = pmac_i2c_open(bus, 0);
 	if (rc) {
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 23eaabb..cc6439a 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -515,7 +515,7 @@
 static int qup_i2c_set_tags(u8 *tags, struct qup_i2c_dev *qup,
 			    struct i2c_msg *msg,  int is_dma)
 {
-	u16 addr = (msg->addr << 1) | ((msg->flags & I2C_M_RD) == I2C_M_RD);
+	u16 addr = i2c_8bit_addr_from_msg(msg);
 	int len = 0;
 	int data_len;
 
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 68ecb56..9aca1b4 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -21,6 +21,8 @@
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -43,6 +45,8 @@
 #define ICSAR	0x1C	/* slave address */
 #define ICMAR	0x20	/* master address */
 #define ICRXTX	0x24	/* data port */
+#define ICDMAER	0x3c	/* DMA enable */
+#define ICFBSCR	0x38	/* first bit setup cycle */
 
 /* ICSCR */
 #define SDBS	(1 << 3)	/* slave data buffer select */
@@ -78,6 +82,16 @@
 #define MDR	(1 << 1)
 #define MAT	(1 << 0)	/* slave addr xfer done */
 
+/* ICDMAER */
+#define RSDMAE	(1 << 3)	/* DMA Slave Received Enable */
+#define TSDMAE	(1 << 2)	/* DMA Slave Transmitted Enable */
+#define RMDMAE	(1 << 1)	/* DMA Master Received Enable */
+#define TMDMAE	(1 << 0)	/* DMA Master Transmitted Enable */
+
+/* ICFBSCR */
+#define TCYC06	0x04		/*  6*Tcyc delay 1st bit between SDA and SCL */
+#define TCYC17	0x0f		/* 17*Tcyc delay 1st bit between SDA and SCL */
+
 
 #define RCAR_BUS_PHASE_START	(MDBS | MIE | ESG)
 #define RCAR_BUS_PHASE_DATA	(MDBS | MIE)
@@ -120,6 +134,12 @@
 	u32 flags;
 	enum rcar_i2c_type devtype;
 	struct i2c_client *slave;
+
+	struct resource *res;
+	struct dma_chan *dma_tx;
+	struct dma_chan *dma_rx;
+	struct scatterlist sg;
+	enum dma_data_direction dma_direction;
 };
 
 #define rcar_i2c_priv_to_dev(p)		((p)->adap.dev.parent)
@@ -287,6 +307,118 @@
 /*
  *		interrupt functions
  */
+static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
+{
+	struct dma_chan *chan = priv->dma_direction == DMA_FROM_DEVICE
+		? priv->dma_rx : priv->dma_tx;
+
+	/* Disable DMA Master Received/Transmitted */
+	rcar_i2c_write(priv, ICDMAER, 0);
+
+	/* Reset default delay */
+	rcar_i2c_write(priv, ICFBSCR, TCYC06);
+
+	dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
+			 priv->msg->len, priv->dma_direction);
+
+	priv->dma_direction = DMA_NONE;
+}
+
+static void rcar_i2c_cleanup_dma(struct rcar_i2c_priv *priv)
+{
+	if (priv->dma_direction == DMA_NONE)
+		return;
+	else if (priv->dma_direction == DMA_FROM_DEVICE)
+		dmaengine_terminate_all(priv->dma_rx);
+	else if (priv->dma_direction == DMA_TO_DEVICE)
+		dmaengine_terminate_all(priv->dma_tx);
+
+	rcar_i2c_dma_unmap(priv);
+}
+
+static void rcar_i2c_dma_callback(void *data)
+{
+	struct rcar_i2c_priv *priv = data;
+
+	priv->pos += sg_dma_len(&priv->sg);
+
+	rcar_i2c_dma_unmap(priv);
+}
+
+static void rcar_i2c_dma(struct rcar_i2c_priv *priv)
+{
+	struct device *dev = rcar_i2c_priv_to_dev(priv);
+	struct i2c_msg *msg = priv->msg;
+	bool read = msg->flags & I2C_M_RD;
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	struct dma_chan *chan = read ? priv->dma_rx : priv->dma_tx;
+	struct dma_async_tx_descriptor *txdesc;
+	dma_addr_t dma_addr;
+	dma_cookie_t cookie;
+	unsigned char *buf;
+	int len;
+
+	/* Do not use DMA if it's not available or for messages < 8 bytes */
+	if (IS_ERR(chan) || msg->len < 8)
+		return;
+
+	if (read) {
+		/*
+		 * The last two bytes needs to be fetched using PIO in
+		 * order for the STOP phase to work.
+		 */
+		buf = priv->msg->buf;
+		len = priv->msg->len - 2;
+	} else {
+		/*
+		 * First byte in message was sent using PIO.
+		 */
+		buf = priv->msg->buf + 1;
+		len = priv->msg->len - 1;
+	}
+
+	dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
+	if (dma_mapping_error(dev, dma_addr)) {
+		dev_dbg(dev, "dma map failed, using PIO\n");
+		return;
+	}
+
+	sg_dma_len(&priv->sg) = len;
+	sg_dma_address(&priv->sg) = dma_addr;
+
+	priv->dma_direction = dir;
+
+	txdesc = dmaengine_prep_slave_sg(chan, &priv->sg, 1,
+					 read ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!txdesc) {
+		dev_dbg(dev, "dma prep slave sg failed, using PIO\n");
+		rcar_i2c_cleanup_dma(priv);
+		return;
+	}
+
+	txdesc->callback = rcar_i2c_dma_callback;
+	txdesc->callback_param = priv;
+
+	cookie = dmaengine_submit(txdesc);
+	if (dma_submit_error(cookie)) {
+		dev_dbg(dev, "submitting dma failed, using PIO\n");
+		rcar_i2c_cleanup_dma(priv);
+		return;
+	}
+
+	/* Set delay for DMA operations */
+	rcar_i2c_write(priv, ICFBSCR, TCYC17);
+
+	/* Enable DMA Master Received/Transmitted */
+	if (read)
+		rcar_i2c_write(priv, ICDMAER, RMDMAE);
+	else
+		rcar_i2c_write(priv, ICDMAER, TMDMAE);
+
+	dma_async_issue_pending(chan);
+}
+
 static void rcar_i2c_irq_send(struct rcar_i2c_priv *priv, u32 msr)
 {
 	struct i2c_msg *msg = priv->msg;
@@ -306,6 +438,12 @@
 		rcar_i2c_write(priv, ICRXTX, msg->buf[priv->pos]);
 		priv->pos++;
 
+		/*
+		 * Try to use DMA to transmit the rest of the data if
+		 * address transfer pashe just finished.
+		 */
+		if (msr & MAT)
+			rcar_i2c_dma(priv);
 	} else {
 		/*
 		 * The last data was pushed to ICRXTX on _PREV_ empty irq.
@@ -340,7 +478,11 @@
 		return;
 
 	if (msr & MAT) {
-		/* Address transfer phase finished, but no data at this point. */
+		/*
+		 * Address transfer phase finished, but no data at this point.
+		 * Try to use DMA to receive data.
+		 */
+		rcar_i2c_dma(priv);
 	} else if (priv->pos < msg->len) {
 		/* get received data */
 		msg->buf[priv->pos] = rcar_i2c_read(priv, ICRXTX);
@@ -472,6 +614,81 @@
 	return IRQ_HANDLED;
 }
 
+static struct dma_chan *rcar_i2c_request_dma_chan(struct device *dev,
+					enum dma_transfer_direction dir,
+					dma_addr_t port_addr)
+{
+	struct dma_chan *chan;
+	struct dma_slave_config cfg;
+	char *chan_name = dir == DMA_MEM_TO_DEV ? "tx" : "rx";
+	int ret;
+
+	chan = dma_request_slave_channel_reason(dev, chan_name);
+	if (IS_ERR(chan)) {
+		ret = PTR_ERR(chan);
+		dev_dbg(dev, "request_channel failed for %s (%d)\n",
+			chan_name, ret);
+		return chan;
+	}
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.direction = dir;
+	if (dir == DMA_MEM_TO_DEV) {
+		cfg.dst_addr = port_addr;
+		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	} else {
+		cfg.src_addr = port_addr;
+		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	}
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret) {
+		dev_dbg(dev, "slave_config failed for %s (%d)\n",
+			chan_name, ret);
+		dma_release_channel(chan);
+		return ERR_PTR(ret);
+	}
+
+	dev_dbg(dev, "got DMA channel for %s\n", chan_name);
+	return chan;
+}
+
+static void rcar_i2c_request_dma(struct rcar_i2c_priv *priv,
+				 struct i2c_msg *msg)
+{
+	struct device *dev = rcar_i2c_priv_to_dev(priv);
+	bool read;
+	struct dma_chan *chan;
+	enum dma_transfer_direction dir;
+
+	read = msg->flags & I2C_M_RD;
+
+	chan = read ? priv->dma_rx : priv->dma_tx;
+	if (PTR_ERR(chan) != -EPROBE_DEFER)
+		return;
+
+	dir = read ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+	chan = rcar_i2c_request_dma_chan(dev, dir, priv->res->start + ICRXTX);
+
+	if (read)
+		priv->dma_rx = chan;
+	else
+		priv->dma_tx = chan;
+}
+
+static void rcar_i2c_release_dma(struct rcar_i2c_priv *priv)
+{
+	if (!IS_ERR(priv->dma_tx)) {
+		dma_release_channel(priv->dma_tx);
+		priv->dma_tx = ERR_PTR(-EPROBE_DEFER);
+	}
+
+	if (!IS_ERR(priv->dma_rx)) {
+		dma_release_channel(priv->dma_rx);
+		priv->dma_rx = ERR_PTR(-EPROBE_DEFER);
+	}
+}
+
 static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
 				struct i2c_msg *msgs,
 				int num)
@@ -493,6 +710,7 @@
 			ret = -EOPNOTSUPP;
 			goto out;
 		}
+		rcar_i2c_request_dma(priv, msgs + i);
 	}
 
 	/* init first message */
@@ -504,6 +722,7 @@
 	time_left = wait_event_timeout(priv->wait, priv->flags & ID_DONE,
 				     num * adap->timeout);
 	if (!time_left) {
+		rcar_i2c_cleanup_dma(priv);
 		rcar_i2c_init(priv);
 		ret = -ETIMEDOUT;
 	} else if (priv->flags & ID_NACK) {
@@ -591,7 +810,6 @@
 {
 	struct rcar_i2c_priv *priv;
 	struct i2c_adapter *adap;
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 	struct i2c_timings i2c_t;
 	int irq, ret;
@@ -606,8 +824,9 @@
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->io = devm_ioremap_resource(dev, res);
+	priv->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	priv->io = devm_ioremap_resource(dev, priv->res);
 	if (IS_ERR(priv->io))
 		return PTR_ERR(priv->io);
 
@@ -626,6 +845,11 @@
 
 	i2c_parse_fw_timings(dev, &i2c_t, false);
 
+	/* Init DMA */
+	sg_init_table(&priv->sg, 1);
+	priv->dma_direction = DMA_NONE;
+	priv->dma_rx = priv->dma_tx = ERR_PTR(-EPROBE_DEFER);
+
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 	ret = rcar_i2c_clock_calculate(priv, &i2c_t);
@@ -673,6 +897,7 @@
 	struct device *dev = &pdev->dev;
 
 	i2c_del_adapter(&priv->adap);
+	rcar_i2c_release_dma(priv);
 	if (priv->flags & ID_P_PM_BLOCKED)
 		pm_runtime_put(dev);
 	pm_runtime_disable(dev);
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
index 3dcc5f3..80bed02 100644
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -101,10 +101,7 @@
 	struct notifier_block clk_rate_nb;
 
 	/* Settings */
-	unsigned int scl_frequency;
-	unsigned int scl_rise_ns;
-	unsigned int scl_fall_ns;
-	unsigned int sda_fall_ns;
+	struct i2c_timings t;
 
 	/* Synchronization & notification */
 	spinlock_t lock;
@@ -437,10 +434,7 @@
  * Calculate divider values for desired SCL frequency
  *
  * @clk_rate: I2C input clock rate
- * @scl_rate: Desired SCL rate
- * @scl_rise_ns: How many ns it takes for SCL to rise.
- * @scl_fall_ns: How many ns it takes for SCL to fall.
- * @sda_fall_ns: How many ns it takes for SDA to fall.
+ * @t: Known I2C timing information.
  * @div_low: Divider output for low
  * @div_high: Divider output for high
  *
@@ -448,11 +442,10 @@
  * a best-effort divider value is returned in divs. If the target rate is
  * too high, we silently use the highest possible rate.
  */
-static int rk3x_i2c_calc_divs(unsigned long clk_rate, unsigned long scl_rate,
-			      unsigned long scl_rise_ns,
-			      unsigned long scl_fall_ns,
-			      unsigned long sda_fall_ns,
-			      unsigned long *div_low, unsigned long *div_high)
+static int rk3x_i2c_calc_divs(unsigned long clk_rate,
+			      struct i2c_timings *t,
+			      unsigned long *div_low,
+			      unsigned long *div_high)
 {
 	unsigned long spec_min_low_ns, spec_min_high_ns;
 	unsigned long spec_setup_start, spec_max_data_hold_ns;
@@ -472,12 +465,12 @@
 	int ret = 0;
 
 	/* Only support standard-mode and fast-mode */
-	if (WARN_ON(scl_rate > 400000))
-		scl_rate = 400000;
+	if (WARN_ON(t->bus_freq_hz > 400000))
+		t->bus_freq_hz = 400000;
 
 	/* prevent scl_rate_khz from becoming 0 */
-	if (WARN_ON(scl_rate < 1000))
-		scl_rate = 1000;
+	if (WARN_ON(t->bus_freq_hz < 1000))
+		t->bus_freq_hz = 1000;
 
 	/*
 	 * min_low_ns:  The minimum number of ns we need to hold low to
@@ -491,7 +484,7 @@
 	 *	 This is because the i2c host on Rockchip holds the data line
 	 *	 for half the low time.
 	 */
-	if (scl_rate <= 100000) {
+	if (t->bus_freq_hz <= 100000) {
 		/* Standard-mode */
 		spec_min_low_ns = 4700;
 		spec_setup_start = 4700;
@@ -506,7 +499,7 @@
 		spec_max_data_hold_ns = 900;
 		data_hold_buffer_ns = 50;
 	}
-	min_high_ns = scl_rise_ns + spec_min_high_ns;
+	min_high_ns = t->scl_rise_ns + spec_min_high_ns;
 
 	/*
 	 * Timings for repeated start:
@@ -517,18 +510,18 @@
 	 * we meet tSU;STA and tHD;STA times.
 	 */
 	min_high_ns = max(min_high_ns,
-		DIV_ROUND_UP((scl_rise_ns + spec_setup_start) * 1000, 875));
+		DIV_ROUND_UP((t->scl_rise_ns + spec_setup_start) * 1000, 875));
 	min_high_ns = max(min_high_ns,
-		DIV_ROUND_UP((scl_rise_ns + spec_setup_start +
-			      sda_fall_ns + spec_min_high_ns), 2));
+		DIV_ROUND_UP((t->scl_rise_ns + spec_setup_start +
+			      t->sda_fall_ns + spec_min_high_ns), 2));
 
-	min_low_ns = scl_fall_ns + spec_min_low_ns;
+	min_low_ns = t->scl_fall_ns + spec_min_low_ns;
 	max_low_ns = spec_max_data_hold_ns * 2 - data_hold_buffer_ns;
 	min_total_ns = min_low_ns + min_high_ns;
 
 	/* Adjust to avoid overflow */
 	clk_rate_khz = DIV_ROUND_UP(clk_rate, 1000);
-	scl_rate_khz = scl_rate / 1000;
+	scl_rate_khz = t->bus_freq_hz / 1000;
 
 	/*
 	 * We need the total div to be >= this number
@@ -616,14 +609,13 @@
 
 static void rk3x_i2c_adapt_div(struct rk3x_i2c *i2c, unsigned long clk_rate)
 {
+	struct i2c_timings *t = &i2c->t;
 	unsigned long div_low, div_high;
 	u64 t_low_ns, t_high_ns;
 	int ret;
 
-	ret = rk3x_i2c_calc_divs(clk_rate, i2c->scl_frequency, i2c->scl_rise_ns,
-				 i2c->scl_fall_ns, i2c->sda_fall_ns,
-				 &div_low, &div_high);
-	WARN_ONCE(ret != 0, "Could not reach SCL freq %u", i2c->scl_frequency);
+	ret = rk3x_i2c_calc_divs(clk_rate, t, &div_low, &div_high);
+	WARN_ONCE(ret != 0, "Could not reach SCL freq %u", t->bus_freq_hz);
 
 	clk_enable(i2c->clk);
 	i2c_writel(i2c, (div_high << 16) | (div_low & 0xffff), REG_CLKDIV);
@@ -634,7 +626,7 @@
 	dev_dbg(i2c->dev,
 		"CLK %lukhz, Req %uns, Act low %lluns high %lluns\n",
 		clk_rate / 1000,
-		1000000000 / i2c->scl_frequency,
+		1000000000 / t->bus_freq_hz,
 		t_low_ns, t_high_ns);
 }
 
@@ -664,9 +656,7 @@
 
 	switch (event) {
 	case PRE_RATE_CHANGE:
-		if (rk3x_i2c_calc_divs(ndata->new_rate, i2c->scl_frequency,
-				       i2c->scl_rise_ns, i2c->scl_fall_ns,
-				       i2c->sda_fall_ns,
+		if (rk3x_i2c_calc_divs(ndata->new_rate, &i2c->t,
 				       &div_low, &div_high) != 0)
 			return NOTIFY_STOP;
 
@@ -880,37 +870,8 @@
 	match = of_match_node(rk3x_i2c_match, np);
 	i2c->soc_data = (struct rk3x_i2c_soc_data *)match->data;
 
-	if (of_property_read_u32(pdev->dev.of_node, "clock-frequency",
-				 &i2c->scl_frequency)) {
-		dev_info(&pdev->dev, "using default SCL frequency: %d\n",
-			 DEFAULT_SCL_RATE);
-		i2c->scl_frequency = DEFAULT_SCL_RATE;
-	}
-
-	if (i2c->scl_frequency == 0 || i2c->scl_frequency > 400 * 1000) {
-		dev_warn(&pdev->dev, "invalid SCL frequency specified.\n");
-		dev_warn(&pdev->dev, "using default SCL frequency: %d\n",
-			 DEFAULT_SCL_RATE);
-		i2c->scl_frequency = DEFAULT_SCL_RATE;
-	}
-
-	/*
-	 * Read rise and fall time from device tree. If not available use
-	 * the default maximum timing from the specification.
-	 */
-	if (of_property_read_u32(pdev->dev.of_node, "i2c-scl-rising-time-ns",
-				 &i2c->scl_rise_ns)) {
-		if (i2c->scl_frequency <= 100000)
-			i2c->scl_rise_ns = 1000;
-		else
-			i2c->scl_rise_ns = 300;
-	}
-	if (of_property_read_u32(pdev->dev.of_node, "i2c-scl-falling-time-ns",
-				 &i2c->scl_fall_ns))
-		i2c->scl_fall_ns = 300;
-	if (of_property_read_u32(pdev->dev.of_node, "i2c-sda-falling-time-ns",
-				 &i2c->sda_fall_ns))
-		i2c->sda_fall_ns = i2c->scl_fall_ns;
+	/* use common interface to get I2C timing properties */
+	i2c_parse_fw_timings(&pdev->dev, &i2c->t, true);
 
 	strlcpy(i2c->adap.name, "rk3x-i2c", sizeof(i2c->adap.name));
 	i2c->adap.owner = THIS_MODULE;
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 362a6de..38dc1ca 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -163,15 +163,14 @@
 MODULE_DEVICE_TABLE(of, s3c24xx_i2c_match);
 #endif
 
-/* s3c24xx_get_device_quirks
- *
+/*
  * Get controller type either from device tree or platform device variant.
-*/
-
+ */
 static inline kernel_ulong_t s3c24xx_get_device_quirks(struct platform_device *pdev)
 {
 	if (pdev->dev.of_node) {
 		const struct of_device_id *match;
+
 		match = of_match_node(s3c24xx_i2c_match, pdev->dev.of_node);
 		return (kernel_ulong_t)match->data;
 	}
@@ -179,12 +178,10 @@
 	return platform_get_device_id(pdev)->driver_data;
 }
 
-/* s3c24xx_i2c_master_complete
- *
- * complete the message and wake up the caller, using the given return code,
+/*
+ * Complete the message and wake up the caller, using the given return code,
  * or zero to mean ok.
-*/
-
+ */
 static inline void s3c24xx_i2c_master_complete(struct s3c24xx_i2c *i2c, int ret)
 {
 	dev_dbg(i2c->dev, "master_complete %d\n", ret);
@@ -217,7 +214,6 @@
 }
 
 /* irq enable/disable functions */
-
 static inline void s3c24xx_i2c_disable_irq(struct s3c24xx_i2c *i2c)
 {
 	unsigned long tmp;
@@ -251,11 +247,9 @@
 	return false;
 }
 
-/* s3c24xx_i2c_message_start
- *
+/*
  * put the start of a message onto the bus
-*/
-
+ */
 static void s3c24xx_i2c_message_start(struct s3c24xx_i2c *i2c,
 				      struct i2c_msg *msg)
 {
@@ -284,9 +278,10 @@
 	dev_dbg(i2c->dev, "START: %08lx to IICSTAT, %02x to DS\n", stat, addr);
 	writeb(addr, i2c->regs + S3C2410_IICDS);
 
-	/* delay here to ensure the data byte has gotten onto the bus
-	 * before the transaction is started */
-
+	/*
+	 * delay here to ensure the data byte has gotten onto the bus
+	 * before the transaction is started
+	 */
 	ndelay(i2c->tx_setup);
 
 	dev_dbg(i2c->dev, "iiccon, %08lx\n", iiccon);
@@ -361,50 +356,46 @@
 	s3c24xx_i2c_disable_irq(i2c);
 }
 
-/* helper functions to determine the current state in the set of
- * messages we are sending */
+/*
+ * helper functions to determine the current state in the set of
+ * messages we are sending
+ */
 
-/* is_lastmsg()
- *
+/*
  * returns TRUE if the current message is the last in the set
-*/
-
+ */
 static inline int is_lastmsg(struct s3c24xx_i2c *i2c)
 {
 	return i2c->msg_idx >= (i2c->msg_num - 1);
 }
 
-/* is_msglast
- *
+/*
  * returns TRUE if we this is the last byte in the current message
-*/
-
+ */
 static inline int is_msglast(struct s3c24xx_i2c *i2c)
 {
-	/* msg->len is always 1 for the first byte of smbus block read.
+	/*
+	 * msg->len is always 1 for the first byte of smbus block read.
 	 * Actual length will be read from slave. More bytes will be
-	 * read according to the length then. */
+	 * read according to the length then.
+	 */
 	if (i2c->msg->flags & I2C_M_RECV_LEN && i2c->msg->len == 1)
 		return 0;
 
 	return i2c->msg_ptr == i2c->msg->len-1;
 }
 
-/* is_msgend
- *
+/*
  * returns TRUE if we reached the end of the current message
-*/
-
+ */
 static inline int is_msgend(struct s3c24xx_i2c *i2c)
 {
 	return i2c->msg_ptr >= i2c->msg->len;
 }
 
-/* i2c_s3c_irq_nextbyte
- *
+/*
  * process an interrupt and work out what to do
  */
-
 static int i2c_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 {
 	unsigned long tmp;
@@ -423,14 +414,13 @@
 		goto out_ack;
 
 	case STATE_START:
-		/* last thing we did was send a start condition on the
+		/*
+		 * last thing we did was send a start condition on the
 		 * bus, or started a new i2c message
 		 */
-
 		if (iicstat & S3C2410_IICSTAT_LASTBIT &&
 		    !(i2c->msg->flags & I2C_M_IGNORE_NAK)) {
 			/* ack was not received... */
-
 			dev_dbg(i2c->dev, "ack was not received\n");
 			s3c24xx_i2c_stop(i2c, -ENXIO);
 			goto out_ack;
@@ -441,9 +431,10 @@
 		else
 			i2c->state = STATE_WRITE;
 
-		/* terminate the transfer if there is nothing to do
-		 * as this is used by the i2c probe to find devices. */
-
+		/*
+		 * Terminate the transfer if there is nothing to do
+		 * as this is used by the i2c probe to find devices.
+		 */
 		if (is_lastmsg(i2c) && i2c->msg->len == 0) {
 			s3c24xx_i2c_stop(i2c, 0);
 			goto out_ack;
@@ -452,14 +443,16 @@
 		if (i2c->state == STATE_READ)
 			goto prepare_read;
 
-		/* fall through to the write state, as we will need to
-		 * send a byte as well */
-
-	case STATE_WRITE:
-		/* we are writing data to the device... check for the
-		 * end of the message, and if so, work out what to do
+		/*
+		 * fall through to the write state, as we will need to
+		 * send a byte as well
 		 */
 
+	case STATE_WRITE:
+		/*
+		 * we are writing data to the device... check for the
+		 * end of the message, and if so, work out what to do
+		 */
 		if (!(i2c->msg->flags & I2C_M_IGNORE_NAK)) {
 			if (iicstat & S3C2410_IICSTAT_LASTBIT) {
 				dev_dbg(i2c->dev, "WRITE: No Ack\n");
@@ -475,12 +468,13 @@
 			byte = i2c->msg->buf[i2c->msg_ptr++];
 			writeb(byte, i2c->regs + S3C2410_IICDS);
 
-			/* delay after writing the byte to allow the
+			/*
+			 * delay after writing the byte to allow the
 			 * data setup time on the bus, as writing the
 			 * data to the register causes the first bit
 			 * to appear on SDA, and SCL will change as
-			 * soon as the interrupt is acknowledged */
-
+			 * soon as the interrupt is acknowledged
+			 */
 			ndelay(i2c->tx_setup);
 
 		} else if (!is_lastmsg(i2c)) {
@@ -496,10 +490,11 @@
 			if (i2c->msg->flags & I2C_M_NOSTART) {
 
 				if (i2c->msg->flags & I2C_M_RD) {
-					/* cannot do this, the controller
+					/*
+					 * cannot do this, the controller
 					 * forces us to send a new START
-					 * when we change direction */
-
+					 * when we change direction
+					 */
 					s3c24xx_i2c_stop(i2c, -EINVAL);
 				}
 
@@ -512,17 +507,16 @@
 
 		} else {
 			/* send stop */
-
 			s3c24xx_i2c_stop(i2c, 0);
 		}
 		break;
 
 	case STATE_READ:
-		/* we have a byte of data in the data register, do
+		/*
+		 * we have a byte of data in the data register, do
 		 * something with it, and then work out whether we are
 		 * going to do any more read/write
 		 */
-
 		byte = readb(i2c->regs + S3C2410_IICDS);
 		i2c->msg->buf[i2c->msg_ptr++] = byte;
 
@@ -537,9 +531,10 @@
 				s3c24xx_i2c_disable_ack(i2c);
 
 		} else if (is_msgend(i2c)) {
-			/* ok, we've read the entire buffer, see if there
-			 * is anything else we need to do */
-
+			/*
+			 * ok, we've read the entire buffer, see if there
+			 * is anything else we need to do
+			 */
 			if (is_lastmsg(i2c)) {
 				/* last message, send stop and complete */
 				dev_dbg(i2c->dev, "READ: Send Stop\n");
@@ -568,11 +563,9 @@
 	return ret;
 }
 
-/* s3c24xx_i2c_irq
- *
+/*
  * top level IRQ servicing routine
-*/
-
+ */
 static irqreturn_t s3c24xx_i2c_irq(int irqno, void *dev_id)
 {
 	struct s3c24xx_i2c *i2c = dev_id;
@@ -595,9 +588,10 @@
 		goto out;
 	}
 
-	/* pretty much this leaves us with the fact that we've
-	 * transmitted or received whatever byte we last sent */
-
+	/*
+	 * pretty much this leaves us with the fact that we've
+	 * transmitted or received whatever byte we last sent
+	 */
 	i2c_s3c_irq_nextbyte(i2c, status);
 
  out:
@@ -630,11 +624,9 @@
 }
 
 
-/* s3c24xx_i2c_set_master
- *
+/*
  * get the i2c bus for a master transaction
-*/
-
+ */
 static int s3c24xx_i2c_set_master(struct s3c24xx_i2c *i2c)
 {
 	unsigned long iicstat;
@@ -652,11 +644,9 @@
 	return -ETIMEDOUT;
 }
 
-/* s3c24xx_i2c_wait_idle
- *
+/*
  * wait for the i2c bus to become idle.
-*/
-
+ */
 static void s3c24xx_i2c_wait_idle(struct s3c24xx_i2c *i2c)
 {
 	unsigned long iicstat;
@@ -706,11 +696,9 @@
 		dev_warn(i2c->dev, "timeout waiting for bus idle\n");
 }
 
-/* s3c24xx_i2c_doxfer
- *
+/*
  * this starts an i2c transfer
-*/
-
+ */
 static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c,
 			      struct i2c_msg *msgs, int num)
 {
@@ -749,9 +737,10 @@
 
 	ret = i2c->msg_idx;
 
-	/* having these next two as dev_err() makes life very
-	 * noisy when doing an i2cdetect */
-
+	/*
+	 * Having these next two as dev_err() makes life very
+	 * noisy when doing an i2cdetect
+	 */
 	if (timeout == 0)
 		dev_dbg(i2c->dev, "timeout\n");
 	else if (ret != num)
@@ -771,12 +760,10 @@
 	return ret;
 }
 
-/* s3c24xx_i2c_xfer
- *
+/*
  * first port of call from the i2c bus code when an message needs
  * transferring across the i2c bus.
-*/
-
+ */
 static int s3c24xx_i2c_xfer(struct i2c_adapter *adap,
 			struct i2c_msg *msgs, int num)
 {
@@ -814,17 +801,14 @@
 }
 
 /* i2c bus registration info */
-
 static const struct i2c_algorithm s3c24xx_i2c_algorithm = {
 	.master_xfer		= s3c24xx_i2c_xfer,
 	.functionality		= s3c24xx_i2c_func,
 };
 
-/* s3c24xx_i2c_calcdivisor
- *
+/*
  * return the divisor settings for a given frequency
-*/
-
+ */
 static int s3c24xx_i2c_calcdivisor(unsigned long clkin, unsigned int wanted,
 				   unsigned int *div1, unsigned int *divs)
 {
@@ -850,13 +834,11 @@
 	return clkin / (calc_divs * calc_div1);
 }
 
-/* s3c24xx_i2c_clockrate
- *
+/*
  * work out a divisor for the user requested frequency setting,
  * either by the requested frequency, or scanning the acceptable
  * range of frequencies until something is found
-*/
-
+ */
 static int s3c24xx_i2c_clockrate(struct s3c24xx_i2c *i2c, unsigned int *got)
 {
 	struct s3c2410_platform_i2c *pdata = i2c->pdata;
@@ -944,7 +926,7 @@
 		i2c_unlock_adapter(&i2c->adap);
 
 		if (ret < 0)
-			dev_err(i2c->dev, "cannot find frequency\n");
+			dev_err(i2c->dev, "cannot find frequency (%d)\n", ret);
 		else
 			dev_info(i2c->dev, "setting freq %d\n", got);
 	}
@@ -995,7 +977,8 @@
 
 		ret = gpio_request(gpio, "i2c-bus");
 		if (ret) {
-			dev_err(i2c->dev, "gpio [%d] request failed\n", gpio);
+			dev_err(i2c->dev, "gpio [%d] request failed (%d)\n",
+				gpio, ret);
 			goto free_gpio;
 		}
 	}
@@ -1028,11 +1011,9 @@
 }
 #endif
 
-/* s3c24xx_i2c_init
- *
+/*
  * initialise the controller, set the IO lines and frequency
-*/
-
+ */
 static int s3c24xx_i2c_init(struct s3c24xx_i2c *i2c)
 {
 	struct s3c2410_platform_i2c *pdata;
@@ -1068,11 +1049,9 @@
 }
 
 #ifdef CONFIG_OF
-/* s3c24xx_i2c_parse_dt
- *
+/*
  * Parse the device tree node and retreive the platform data.
-*/
-
+ */
 static void
 s3c24xx_i2c_parse_dt(struct device_node *np, struct s3c24xx_i2c *i2c)
 {
@@ -1105,17 +1084,9 @@
 }
 #else
 static void
-s3c24xx_i2c_parse_dt(struct device_node *np, struct s3c24xx_i2c *i2c)
-{
-	return;
-}
+s3c24xx_i2c_parse_dt(struct device_node *np, struct s3c24xx_i2c *i2c) { }
 #endif
 
-/* s3c24xx_i2c_probe
- *
- * called by the bus driver when a suitable device is found
-*/
-
 static int s3c24xx_i2c_probe(struct platform_device *pdev)
 {
 	struct s3c24xx_i2c *i2c;
@@ -1156,7 +1127,6 @@
 	init_waitqueue_head(&i2c->wait);
 
 	/* find the clock and enable it */
-
 	i2c->dev = &pdev->dev;
 	i2c->clk = devm_clk_get(&pdev->dev, "i2c");
 	if (IS_ERR(i2c->clk)) {
@@ -1166,9 +1136,7 @@
 
 	dev_dbg(&pdev->dev, "clock source %p\n", i2c->clk);
 
-
 	/* map the registers */
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	i2c->regs = devm_ioremap_resource(&pdev->dev, res);
 
@@ -1179,33 +1147,35 @@
 		i2c->regs, res);
 
 	/* setup info block for the i2c core */
-
 	i2c->adap.algo_data = i2c;
 	i2c->adap.dev.parent = &pdev->dev;
-
 	i2c->pctrl = devm_pinctrl_get_select_default(i2c->dev);
 
 	/* inititalise the i2c gpio lines */
-
-	if (i2c->pdata->cfg_gpio) {
+	if (i2c->pdata->cfg_gpio)
 		i2c->pdata->cfg_gpio(to_platform_device(i2c->dev));
-	} else if (IS_ERR(i2c->pctrl) && s3c24xx_i2c_parse_dt_gpio(i2c)) {
+	else if (IS_ERR(i2c->pctrl) && s3c24xx_i2c_parse_dt_gpio(i2c))
 		return -EINVAL;
-	}
 
 	/* initialise the i2c controller */
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "I2C clock enable failed\n");
+		return ret;
+	}
 
-	clk_prepare_enable(i2c->clk);
 	ret = s3c24xx_i2c_init(i2c);
 	clk_disable(i2c->clk);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "I2C controller init failed\n");
+		clk_unprepare(i2c->clk);
 		return ret;
 	}
-	/* find the IRQ for this unit (note, this relies on the init call to
+
+	/*
+	 * find the IRQ for this unit (note, this relies on the init call to
 	 * ensure no current IRQs pending
 	 */
-
 	if (!(i2c->quirks & QUIRK_POLL)) {
 		i2c->irq = ret = platform_get_irq(pdev, 0);
 		if (ret <= 0) {
@@ -1214,9 +1184,8 @@
 			return ret;
 		}
 
-	ret = devm_request_irq(&pdev->dev, i2c->irq, s3c24xx_i2c_irq, 0,
-				dev_name(&pdev->dev), i2c);
-
+		ret = devm_request_irq(&pdev->dev, i2c->irq, s3c24xx_i2c_irq,
+				       0, dev_name(&pdev->dev), i2c);
 		if (ret != 0) {
 			dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq);
 			clk_unprepare(i2c->clk);
@@ -1231,12 +1200,12 @@
 		return ret;
 	}
 
-	/* Note, previous versions of the driver used i2c_add_adapter()
+	/*
+	 * Note, previous versions of the driver used i2c_add_adapter()
 	 * to add the bus at any number. We now pass the bus number via
 	 * the platform data, so if unset it will now default to always
 	 * being bus 0.
 	 */
-
 	i2c->adap.nr = i2c->pdata->bus_num;
 	i2c->adap.dev.of_node = pdev->dev.of_node;
 
@@ -1257,11 +1226,6 @@
 	return 0;
 }
 
-/* s3c24xx_i2c_remove
- *
- * called when device is removed from the bus
-*/
-
 static int s3c24xx_i2c_remove(struct platform_device *pdev)
 {
 	struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev);
@@ -1316,14 +1280,8 @@
 
 #ifdef CONFIG_PM
 static const struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.suspend_noirq = s3c24xx_i2c_suspend_noirq,
-	.resume_noirq = s3c24xx_i2c_resume_noirq,
-	.freeze_noirq = s3c24xx_i2c_suspend_noirq,
-	.thaw_noirq = s3c24xx_i2c_resume_noirq,
-	.poweroff_noirq = s3c24xx_i2c_suspend_noirq,
-	.restore_noirq = s3c24xx_i2c_resume_noirq,
-#endif
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(s3c24xx_i2c_suspend_noirq,
+				      s3c24xx_i2c_resume_noirq)
 };
 
 #define S3C24XX_DEV_PM_OPS (&s3c24xx_i2c_dev_pm_ops)
@@ -1331,8 +1289,6 @@
 #define S3C24XX_DEV_PM_OPS NULL
 #endif
 
-/* device driver for platform bus bits */
-
 static struct platform_driver s3c24xx_i2c_driver = {
 	.probe		= s3c24xx_i2c_probe,
 	.remove		= s3c24xx_i2c_remove,
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 7d2bd3e..6fb3e26 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -398,8 +398,7 @@
 {
 	switch (pd->pos) {
 	case -1:
-		*buf = (pd->msg->addr & 0x7f) << 1;
-		*buf |= (pd->msg->flags & I2C_M_RD) ? 1 : 0;
+		*buf = i2c_8bit_addr_from_msg(pd->msg);
 		break;
 	default:
 		*buf = pd->msg->buf[pd->pos];
diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c
index 13e51ef..792a42b 100644
--- a/drivers/i2c/busses/i2c-sirf.c
+++ b/drivers/i2c/busses/i2c-sirf.c
@@ -190,9 +190,7 @@
 
 	writel(regval, siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
 
-	addr = msg->addr << 1;	/* Generate address */
-	if (msg->flags & I2C_M_RD)
-		addr |= 1;
+	addr = i2c_8bit_addr_from_msg(msg);
 
 	/* Reverse direction bit */
 	if (msg->flags & I2C_M_REV_DIR_ADDR)
diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c
index 6ee7715..944ec420 100644
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -337,10 +337,42 @@
 	writel_relaxed(val, i2c_dev->base + SSC_NOISE_SUPP_WIDTH_DATAOUT);
 }
 
+static int st_i2c_recover_bus(struct i2c_adapter *i2c_adap)
+{
+	struct st_i2c_dev *i2c_dev = i2c_get_adapdata(i2c_adap);
+	u32 ctl;
+
+	dev_dbg(i2c_dev->dev, "Trying to recover bus\n");
+
+	/*
+	 * SSP IP is dual role SPI/I2C to generate 9 clock pulses
+	 * we switch to SPI node, 9 bit words and write a 0. This
+	 * has been validate with a oscilloscope and is easier
+	 * than switching to GPIO mode.
+	 */
+
+	/* Disable interrupts */
+	writel_relaxed(0, i2c_dev->base + SSC_IEN);
+
+	st_i2c_hw_config(i2c_dev);
+
+	ctl = SSC_CTL_EN | SSC_CTL_MS |	SSC_CTL_EN_RX_FIFO | SSC_CTL_EN_TX_FIFO;
+	st_i2c_set_bits(i2c_dev->base + SSC_CTL, ctl);
+
+	st_i2c_clr_bits(i2c_dev->base + SSC_I2C, SSC_I2C_I2CM);
+	usleep_range(8000, 10000);
+
+	writel_relaxed(0, i2c_dev->base + SSC_TBUF);
+	usleep_range(2000, 4000);
+	st_i2c_set_bits(i2c_dev->base + SSC_I2C, SSC_I2C_I2CM);
+
+	return 0;
+}
+
 static int st_i2c_wait_free_bus(struct st_i2c_dev *i2c_dev)
 {
 	u32 sta;
-	int i;
+	int i, ret;
 
 	for (i = 0; i < 10; i++) {
 		sta = readl_relaxed(i2c_dev->base + SSC_STA);
@@ -352,6 +384,12 @@
 
 	dev_err(i2c_dev->dev, "bus not free (status = 0x%08x)\n", sta);
 
+	ret = i2c_recover_bus(&i2c_dev->adap);
+	if (ret) {
+		dev_err(i2c_dev->dev, "Failed to recover the bus (%d)\n", ret);
+		return ret;
+	}
+
 	return -EBUSY;
 }
 
@@ -614,8 +652,7 @@
 	unsigned long timeout;
 	int ret;
 
-	c->addr		= (u8)(msg->addr << 1);
-	c->addr		|= (msg->flags & I2C_M_RD);
+	c->addr		= i2c_8bit_addr_from_msg(msg);
 	c->buf		= msg->buf;
 	c->count	= msg->len;
 	c->xfered	= 0;
@@ -744,6 +781,10 @@
 	.functionality = st_i2c_func,
 };
 
+static struct i2c_bus_recovery_info st_i2c_recovery_info = {
+	.recover_bus = st_i2c_recover_bus,
+};
+
 static int st_i2c_of_get_deglitch(struct device_node *np,
 		struct st_i2c_dev *i2c_dev)
 {
@@ -826,6 +867,7 @@
 	adap->timeout = 2 * HZ;
 	adap->retries = 0;
 	adap->algo = &st_i2c_algo;
+	adap->bus_recovery_info = &st_i2c_recovery_info;
 	adap->dev.parent = &pdev->dev;
 	adap->dev.of_node = pdev->dev.of_node;
 
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 929185a..445398c3 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -38,6 +38,7 @@
 #define I2C_CNFG_DEBOUNCE_CNT_SHIFT		12
 #define I2C_CNFG_PACKET_MODE_EN			(1<<10)
 #define I2C_CNFG_NEW_MASTER_FSM			(1<<11)
+#define I2C_CNFG_MULTI_MASTER_MODE		(1<<17)
 #define I2C_STATUS				0x01C
 #define I2C_SL_CNFG				0x020
 #define I2C_SL_CNFG_NACK			(1<<1)
@@ -106,6 +107,9 @@
 #define I2C_SLV_CONFIG_LOAD			(1 << 1)
 #define I2C_TIMEOUT_CONFIG_LOAD			(1 << 2)
 
+#define I2C_CLKEN_OVERRIDE			0x090
+#define I2C_MST_CORE_CLKEN_OVR			(1 << 0)
+
 /*
  * msg_end_type: The bus control which need to be send at end of transfer.
  * @MSG_END_STOP: Send stop pulse at end of transfer.
@@ -143,6 +147,8 @@
 	int clk_divisor_hs_mode;
 	int clk_divisor_std_fast_mode;
 	u16 clk_divisor_fast_plus_mode;
+	bool has_multi_master_mode;
+	bool has_slcg_override_reg;
 };
 
 /**
@@ -184,6 +190,7 @@
 	u32 bus_clk_rate;
 	u16 clk_divisor_non_hs_mode;
 	bool is_suspended;
+	bool is_multimaster_mode;
 };
 
 static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg)
@@ -438,6 +445,10 @@
 
 	val = I2C_CNFG_NEW_MASTER_FSM | I2C_CNFG_PACKET_MODE_EN |
 		(0x2 << I2C_CNFG_DEBOUNCE_CNT_SHIFT);
+
+	if (i2c_dev->hw->has_multi_master_mode)
+		val |= I2C_CNFG_MULTI_MASTER_MODE;
+
 	i2c_writel(i2c_dev, val, I2C_CNFG);
 	i2c_writel(i2c_dev, 0, I2C_INT_MASK);
 
@@ -463,25 +474,29 @@
 	if (tegra_i2c_flush_fifos(i2c_dev))
 		err = -ETIMEDOUT;
 
+	if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg)
+		i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE);
+
 	if (i2c_dev->hw->has_config_load_reg) {
 		i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD);
 		while (i2c_readl(i2c_dev, I2C_CONFIG_LOAD) != 0) {
 			if (time_after(jiffies, timeout)) {
 				dev_warn(i2c_dev->dev,
 					"timeout waiting for config load\n");
-				return -ETIMEDOUT;
+				err = -ETIMEDOUT;
+				goto err;
 			}
 			msleep(1);
 		}
 	}
 
-	tegra_i2c_clock_disable(i2c_dev);
-
 	if (i2c_dev->irq_disabled) {
 		i2c_dev->irq_disabled = 0;
 		enable_irq(i2c_dev->irq);
 	}
 
+err:
+	tegra_i2c_clock_disable(i2c_dev);
 	return err;
 }
 
@@ -688,6 +703,20 @@
 	return ret;
 }
 
+static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev)
+{
+	struct device_node *np = i2c_dev->dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "clock-frequency",
+			&i2c_dev->bus_clk_rate);
+	if (ret)
+		i2c_dev->bus_clk_rate = 100000; /* default clock rate */
+
+	i2c_dev->is_multimaster_mode = of_property_read_bool(np,
+			"multi-master");
+}
+
 static const struct i2c_algorithm tegra_i2c_algo = {
 	.master_xfer	= tegra_i2c_xfer,
 	.functionality	= tegra_i2c_func,
@@ -707,6 +736,8 @@
 	.clk_divisor_std_fast_mode = 0,
 	.clk_divisor_fast_plus_mode = 0,
 	.has_config_load_reg = false,
+	.has_multi_master_mode = false,
+	.has_slcg_override_reg = false,
 };
 
 static const struct tegra_i2c_hw_feature tegra30_i2c_hw = {
@@ -717,6 +748,8 @@
 	.clk_divisor_std_fast_mode = 0,
 	.clk_divisor_fast_plus_mode = 0,
 	.has_config_load_reg = false,
+	.has_multi_master_mode = false,
+	.has_slcg_override_reg = false,
 };
 
 static const struct tegra_i2c_hw_feature tegra114_i2c_hw = {
@@ -727,6 +760,8 @@
 	.clk_divisor_std_fast_mode = 0x19,
 	.clk_divisor_fast_plus_mode = 0x10,
 	.has_config_load_reg = false,
+	.has_multi_master_mode = false,
+	.has_slcg_override_reg = false,
 };
 
 static const struct tegra_i2c_hw_feature tegra124_i2c_hw = {
@@ -737,10 +772,25 @@
 	.clk_divisor_std_fast_mode = 0x19,
 	.clk_divisor_fast_plus_mode = 0x10,
 	.has_config_load_reg = true,
+	.has_multi_master_mode = false,
+	.has_slcg_override_reg = true,
+};
+
+static const struct tegra_i2c_hw_feature tegra210_i2c_hw = {
+	.has_continue_xfer_support = true,
+	.has_per_pkt_xfer_complete_irq = true,
+	.has_single_clk_source = true,
+	.clk_divisor_hs_mode = 1,
+	.clk_divisor_std_fast_mode = 0x19,
+	.clk_divisor_fast_plus_mode = 0x10,
+	.has_config_load_reg = true,
+	.has_multi_master_mode = true,
+	.has_slcg_override_reg = true,
 };
 
 /* Match table for of_platform binding */
 static const struct of_device_id tegra_i2c_of_match[] = {
+	{ .compatible = "nvidia,tegra210-i2c", .data = &tegra210_i2c_hw, },
 	{ .compatible = "nvidia,tegra124-i2c", .data = &tegra124_i2c_hw, },
 	{ .compatible = "nvidia,tegra114-i2c", .data = &tegra114_i2c_hw, },
 	{ .compatible = "nvidia,tegra30-i2c", .data = &tegra30_i2c_hw, },
@@ -797,10 +847,7 @@
 		return PTR_ERR(i2c_dev->rst);
 	}
 
-	ret = of_property_read_u32(i2c_dev->dev->of_node, "clock-frequency",
-					&i2c_dev->bus_clk_rate);
-	if (ret)
-		i2c_dev->bus_clk_rate = 100000; /* default clock rate */
+	tegra_i2c_parse_dt(i2c_dev);
 
 	i2c_dev->hw = &tegra20_i2c_hw;
 
@@ -853,6 +900,15 @@
 		goto unprepare_fast_clk;
 	}
 
+	if (i2c_dev->is_multimaster_mode) {
+		ret = clk_enable(i2c_dev->div_clk);
+		if (ret < 0) {
+			dev_err(i2c_dev->dev, "div_clk enable failed %d\n",
+				ret);
+			goto unprepare_div_clk;
+		}
+	}
+
 	ret = tegra_i2c_init(i2c_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize i2c controller");
@@ -863,7 +919,7 @@
 			tegra_i2c_isr, 0, dev_name(&pdev->dev), i2c_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to request irq %i\n", i2c_dev->irq);
-		goto unprepare_div_clk;
+		goto disable_div_clk;
 	}
 
 	i2c_set_adapdata(&i2c_dev->adapter, i2c_dev);
@@ -878,11 +934,15 @@
 	ret = i2c_add_numbered_adapter(&i2c_dev->adapter);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to add I2C adapter\n");
-		goto unprepare_div_clk;
+		goto disable_div_clk;
 	}
 
 	return 0;
 
+disable_div_clk:
+	if (i2c_dev->is_multimaster_mode)
+		clk_disable(i2c_dev->div_clk);
+
 unprepare_div_clk:
 	clk_unprepare(i2c_dev->div_clk);
 
@@ -898,6 +958,9 @@
 	struct tegra_i2c_dev *i2c_dev = platform_get_drvdata(pdev);
 	i2c_del_adapter(&i2c_dev->adapter);
 
+	if (i2c_dev->is_multimaster_mode)
+		clk_disable(i2c_dev->div_clk);
+
 	clk_unprepare(i2c_dev->div_clk);
 	if (!i2c_dev->hw->has_single_clk_source)
 		clk_unprepare(i2c_dev->fast_clk);
diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index 213ba55..aeead0d 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -524,7 +524,7 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(dev, "failed to get IRQ number");
+		dev_err(dev, "failed to get IRQ number\n");
 		return irq;
 	}
 
diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index 89eaa8a..475a5eb 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c
@@ -381,7 +381,7 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(dev, "failed to get IRQ number");
+		dev_err(dev, "failed to get IRQ number\n");
 		return irq;
 	}
 
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index e584d88..af11b65 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -954,48 +954,40 @@
 }
 
 /**
- * i2c_lock_adapter - Get exclusive access to an I2C bus segment
+ * i2c_adapter_lock_bus - Get exclusive access to an I2C bus segment
  * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	locks only this branch in the adapter tree
  */
-void i2c_lock_adapter(struct i2c_adapter *adapter)
+static void i2c_adapter_lock_bus(struct i2c_adapter *adapter,
+				 unsigned int flags)
 {
-	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
-
-	if (parent)
-		i2c_lock_adapter(parent);
-	else
-		rt_mutex_lock(&adapter->bus_lock);
-}
-EXPORT_SYMBOL_GPL(i2c_lock_adapter);
-
-/**
- * i2c_trylock_adapter - Try to get exclusive access to an I2C bus segment
- * @adapter: Target I2C bus segment
- */
-static int i2c_trylock_adapter(struct i2c_adapter *adapter)
-{
-	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
-
-	if (parent)
-		return i2c_trylock_adapter(parent);
-	else
-		return rt_mutex_trylock(&adapter->bus_lock);
+	rt_mutex_lock(&adapter->bus_lock);
 }
 
 /**
- * i2c_unlock_adapter - Release exclusive access to an I2C bus segment
+ * i2c_adapter_trylock_bus - Try to get exclusive access to an I2C bus segment
  * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER trylocks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	trylocks only this branch in the adapter tree
  */
-void i2c_unlock_adapter(struct i2c_adapter *adapter)
+static int i2c_adapter_trylock_bus(struct i2c_adapter *adapter,
+				   unsigned int flags)
 {
-	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
-
-	if (parent)
-		i2c_unlock_adapter(parent);
-	else
-		rt_mutex_unlock(&adapter->bus_lock);
+	return rt_mutex_trylock(&adapter->bus_lock);
 }
-EXPORT_SYMBOL_GPL(i2c_unlock_adapter);
+
+/**
+ * i2c_adapter_unlock_bus - Release exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	unlocks only this branch in the adapter tree
+ */
+static void i2c_adapter_unlock_bus(struct i2c_adapter *adapter,
+				   unsigned int flags)
+{
+	rt_mutex_unlock(&adapter->bus_lock);
+}
 
 static void i2c_dev_set_name(struct i2c_adapter *adap,
 			     struct i2c_client *client)
@@ -1541,7 +1533,14 @@
 		return -EINVAL;
 	}
 
+	if (!adap->lock_bus) {
+		adap->lock_bus = i2c_adapter_lock_bus;
+		adap->trylock_bus = i2c_adapter_trylock_bus;
+		adap->unlock_bus = i2c_adapter_unlock_bus;
+	}
+
 	rt_mutex_init(&adap->bus_lock);
+	rt_mutex_init(&adap->mux_lock);
 	mutex_init(&adap->userspace_clients_lock);
 	INIT_LIST_HEAD(&adap->userspace_clients);
 
@@ -1559,6 +1558,7 @@
 	dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
 
 	pm_runtime_no_callbacks(&adap->dev);
+	pm_suspend_ignore_children(&adap->dev, true);
 	pm_runtime_enable(&adap->dev);
 
 #ifdef CONFIG_I2C_COMPAT
@@ -1594,10 +1594,12 @@
 
 			bri->get_scl = get_scl_gpio_value;
 			bri->set_scl = set_scl_gpio_value;
-		} else if (!bri->set_scl || !bri->get_scl) {
+		} else if (bri->recover_bus == i2c_generic_scl_recovery) {
 			/* Generic SCL recovery */
-			dev_err(&adap->dev, "No {get|set}_gpio() found, not using recovery\n");
-			adap->bus_recovery_info = NULL;
+			if (!bri->set_scl || !bri->get_scl) {
+				dev_err(&adap->dev, "No {get|set}_scl() found, not using recovery\n");
+				adap->bus_recovery_info = NULL;
+			}
 		}
 	}
 
@@ -2309,16 +2311,16 @@
 #endif
 
 		if (in_atomic() || irqs_disabled()) {
-			ret = i2c_trylock_adapter(adap);
+			ret = adap->trylock_bus(adap, I2C_LOCK_SEGMENT);
 			if (!ret)
 				/* I2C activity is ongoing. */
 				return -EAGAIN;
 		} else {
-			i2c_lock_adapter(adap);
+			i2c_lock_bus(adap, I2C_LOCK_SEGMENT);
 		}
 
 		ret = __i2c_transfer(adap, msgs, num);
-		i2c_unlock_adapter(adap);
+		i2c_unlock_bus(adap, I2C_LOCK_SEGMENT);
 
 		return ret;
 	} else {
@@ -2646,7 +2648,7 @@
 static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg)
 {
 	/* The address will be sent first */
-	u8 addr = (msg->addr << 1) | !!(msg->flags & I2C_M_RD);
+	u8 addr = i2c_8bit_addr_from_msg(msg);
 	pec = i2c_smbus_pec(pec, &addr, 1);
 
 	/* The data buffer follows */
@@ -3093,7 +3095,7 @@
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
 
 	if (adapter->algo->smbus_xfer) {
-		i2c_lock_adapter(adapter);
+		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
@@ -3107,7 +3109,7 @@
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		i2c_unlock_adapter(adapter);
+		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
 			goto trace;
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index d402287..8eee986 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -31,30 +31,66 @@
 struct i2c_mux_priv {
 	struct i2c_adapter adap;
 	struct i2c_algorithm algo;
-
-	struct i2c_adapter *parent;
-	struct device *mux_dev;
-	void *mux_priv;
+	struct i2c_mux_core *muxc;
 	u32 chan_id;
-
-	int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
-	int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
 };
 
+static int __i2c_mux_master_xfer(struct i2c_adapter *adap,
+				 struct i2c_msg msgs[], int num)
+{
+	struct i2c_mux_priv *priv = adap->algo_data;
+	struct i2c_mux_core *muxc = priv->muxc;
+	struct i2c_adapter *parent = muxc->parent;
+	int ret;
+
+	/* Switch to the right mux port and perform the transfer. */
+
+	ret = muxc->select(muxc, priv->chan_id);
+	if (ret >= 0)
+		ret = __i2c_transfer(parent, msgs, num);
+	if (muxc->deselect)
+		muxc->deselect(muxc, priv->chan_id);
+
+	return ret;
+}
+
 static int i2c_mux_master_xfer(struct i2c_adapter *adap,
 			       struct i2c_msg msgs[], int num)
 {
 	struct i2c_mux_priv *priv = adap->algo_data;
-	struct i2c_adapter *parent = priv->parent;
+	struct i2c_mux_core *muxc = priv->muxc;
+	struct i2c_adapter *parent = muxc->parent;
 	int ret;
 
 	/* Switch to the right mux port and perform the transfer. */
 
-	ret = priv->select(parent, priv->mux_priv, priv->chan_id);
+	ret = muxc->select(muxc, priv->chan_id);
 	if (ret >= 0)
-		ret = __i2c_transfer(parent, msgs, num);
-	if (priv->deselect)
-		priv->deselect(parent, priv->mux_priv, priv->chan_id);
+		ret = i2c_transfer(parent, msgs, num);
+	if (muxc->deselect)
+		muxc->deselect(muxc, priv->chan_id);
+
+	return ret;
+}
+
+static int __i2c_mux_smbus_xfer(struct i2c_adapter *adap,
+				u16 addr, unsigned short flags,
+				char read_write, u8 command,
+				int size, union i2c_smbus_data *data)
+{
+	struct i2c_mux_priv *priv = adap->algo_data;
+	struct i2c_mux_core *muxc = priv->muxc;
+	struct i2c_adapter *parent = muxc->parent;
+	int ret;
+
+	/* Select the right mux port and perform the transfer. */
+
+	ret = muxc->select(muxc, priv->chan_id);
+	if (ret >= 0)
+		ret = parent->algo->smbus_xfer(parent, addr, flags,
+					read_write, command, size, data);
+	if (muxc->deselect)
+		muxc->deselect(muxc, priv->chan_id);
 
 	return ret;
 }
@@ -65,17 +101,18 @@
 			      int size, union i2c_smbus_data *data)
 {
 	struct i2c_mux_priv *priv = adap->algo_data;
-	struct i2c_adapter *parent = priv->parent;
+	struct i2c_mux_core *muxc = priv->muxc;
+	struct i2c_adapter *parent = muxc->parent;
 	int ret;
 
 	/* Select the right mux port and perform the transfer. */
 
-	ret = priv->select(parent, priv->mux_priv, priv->chan_id);
+	ret = muxc->select(muxc, priv->chan_id);
 	if (ret >= 0)
-		ret = parent->algo->smbus_xfer(parent, addr, flags,
-					read_write, command, size, data);
-	if (priv->deselect)
-		priv->deselect(parent, priv->mux_priv, priv->chan_id);
+		ret = i2c_smbus_xfer(parent, addr, flags,
+				     read_write, command, size, data);
+	if (muxc->deselect)
+		muxc->deselect(muxc, priv->chan_id);
 
 	return ret;
 }
@@ -84,7 +121,7 @@
 static u32 i2c_mux_functionality(struct i2c_adapter *adap)
 {
 	struct i2c_mux_priv *priv = adap->algo_data;
-	struct i2c_adapter *parent = priv->parent;
+	struct i2c_adapter *parent = priv->muxc->parent;
 
 	return parent->algo->functionality(parent);
 }
@@ -102,38 +139,167 @@
 	return class;
 }
 
-struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
-				struct device *mux_dev,
-				void *mux_priv, u32 force_nr, u32 chan_id,
-				unsigned int class,
-				int (*select) (struct i2c_adapter *,
-					       void *, u32),
-				int (*deselect) (struct i2c_adapter *,
-						 void *, u32))
+static void i2c_mux_lock_bus(struct i2c_adapter *adapter, unsigned int flags)
 {
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	rt_mutex_lock(&parent->mux_lock);
+	if (!(flags & I2C_LOCK_ROOT_ADAPTER))
+		return;
+	i2c_lock_bus(parent, flags);
+}
+
+static int i2c_mux_trylock_bus(struct i2c_adapter *adapter, unsigned int flags)
+{
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	if (!rt_mutex_trylock(&parent->mux_lock))
+		return 0;	/* mux_lock not locked, failure */
+	if (!(flags & I2C_LOCK_ROOT_ADAPTER))
+		return 1;	/* we only want mux_lock, success */
+	if (parent->trylock_bus(parent, flags))
+		return 1;	/* parent locked too, success */
+	rt_mutex_unlock(&parent->mux_lock);
+	return 0;		/* parent not locked, failure */
+}
+
+static void i2c_mux_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
+{
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	if (flags & I2C_LOCK_ROOT_ADAPTER)
+		i2c_unlock_bus(parent, flags);
+	rt_mutex_unlock(&parent->mux_lock);
+}
+
+static void i2c_parent_lock_bus(struct i2c_adapter *adapter,
+				unsigned int flags)
+{
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	rt_mutex_lock(&parent->mux_lock);
+	i2c_lock_bus(parent, flags);
+}
+
+static int i2c_parent_trylock_bus(struct i2c_adapter *adapter,
+				  unsigned int flags)
+{
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	if (!rt_mutex_trylock(&parent->mux_lock))
+		return 0;	/* mux_lock not locked, failure */
+	if (parent->trylock_bus(parent, flags))
+		return 1;	/* parent locked too, success */
+	rt_mutex_unlock(&parent->mux_lock);
+	return 0;		/* parent not locked, failure */
+}
+
+static void i2c_parent_unlock_bus(struct i2c_adapter *adapter,
+				  unsigned int flags)
+{
+	struct i2c_mux_priv *priv = adapter->algo_data;
+	struct i2c_adapter *parent = priv->muxc->parent;
+
+	i2c_unlock_bus(parent, flags);
+	rt_mutex_unlock(&parent->mux_lock);
+}
+
+struct i2c_adapter *i2c_root_adapter(struct device *dev)
+{
+	struct device *i2c;
+	struct i2c_adapter *i2c_root;
+
+	/*
+	 * Walk up the device tree to find an i2c adapter, indicating
+	 * that this is an i2c client device. Check all ancestors to
+	 * handle mfd devices etc.
+	 */
+	for (i2c = dev; i2c; i2c = i2c->parent) {
+		if (i2c->type == &i2c_adapter_type)
+			break;
+	}
+	if (!i2c)
+		return NULL;
+
+	/* Continue up the tree to find the root i2c adapter */
+	i2c_root = to_i2c_adapter(i2c);
+	while (i2c_parent_is_i2c_adapter(i2c_root))
+		i2c_root = i2c_parent_is_i2c_adapter(i2c_root);
+
+	return i2c_root;
+}
+EXPORT_SYMBOL_GPL(i2c_root_adapter);
+
+struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent,
+				   struct device *dev, int max_adapters,
+				   int sizeof_priv, u32 flags,
+				   int (*select)(struct i2c_mux_core *, u32),
+				   int (*deselect)(struct i2c_mux_core *, u32))
+{
+	struct i2c_mux_core *muxc;
+
+	muxc = devm_kzalloc(dev, sizeof(*muxc)
+			    + max_adapters * sizeof(muxc->adapter[0])
+			    + sizeof_priv, GFP_KERNEL);
+	if (!muxc)
+		return NULL;
+	if (sizeof_priv)
+		muxc->priv = &muxc->adapter[max_adapters];
+
+	muxc->parent = parent;
+	muxc->dev = dev;
+	if (flags & I2C_MUX_LOCKED)
+		muxc->mux_locked = true;
+	muxc->select = select;
+	muxc->deselect = deselect;
+	muxc->max_adapters = max_adapters;
+
+	return muxc;
+}
+EXPORT_SYMBOL_GPL(i2c_mux_alloc);
+
+int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
+			u32 force_nr, u32 chan_id,
+			unsigned int class)
+{
+	struct i2c_adapter *parent = muxc->parent;
 	struct i2c_mux_priv *priv;
 	char symlink_name[20];
 	int ret;
 
-	priv = kzalloc(sizeof(struct i2c_mux_priv), GFP_KERNEL);
+	if (muxc->num_adapters >= muxc->max_adapters) {
+		dev_err(muxc->dev, "No room for more i2c-mux adapters\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
-		return NULL;
+		return -ENOMEM;
 
 	/* Set up private adapter data */
-	priv->parent = parent;
-	priv->mux_dev = mux_dev;
-	priv->mux_priv = mux_priv;
+	priv->muxc = muxc;
 	priv->chan_id = chan_id;
-	priv->select = select;
-	priv->deselect = deselect;
 
 	/* Need to do algo dynamically because we don't know ahead
 	 * of time what sort of physical adapter we'll be dealing with.
 	 */
-	if (parent->algo->master_xfer)
-		priv->algo.master_xfer = i2c_mux_master_xfer;
-	if (parent->algo->smbus_xfer)
-		priv->algo.smbus_xfer = i2c_mux_smbus_xfer;
+	if (parent->algo->master_xfer) {
+		if (muxc->mux_locked)
+			priv->algo.master_xfer = i2c_mux_master_xfer;
+		else
+			priv->algo.master_xfer = __i2c_mux_master_xfer;
+	}
+	if (parent->algo->smbus_xfer) {
+		if (muxc->mux_locked)
+			priv->algo.smbus_xfer = i2c_mux_smbus_xfer;
+		else
+			priv->algo.smbus_xfer = __i2c_mux_smbus_xfer;
+	}
 	priv->algo.functionality = i2c_mux_functionality;
 
 	/* Now fill out new adapter structure */
@@ -146,6 +312,15 @@
 	priv->adap.retries = parent->retries;
 	priv->adap.timeout = parent->timeout;
 	priv->adap.quirks = parent->quirks;
+	if (muxc->mux_locked) {
+		priv->adap.lock_bus = i2c_mux_lock_bus;
+		priv->adap.trylock_bus = i2c_mux_trylock_bus;
+		priv->adap.unlock_bus = i2c_mux_unlock_bus;
+	} else {
+		priv->adap.lock_bus = i2c_parent_lock_bus;
+		priv->adap.trylock_bus = i2c_parent_trylock_bus;
+		priv->adap.unlock_bus = i2c_parent_unlock_bus;
+	}
 
 	/* Sanity check on class */
 	if (i2c_mux_parent_classes(parent) & class)
@@ -159,11 +334,11 @@
 	 * Try to populate the mux adapter's of_node, expands to
 	 * nothing if !CONFIG_OF.
 	 */
-	if (mux_dev->of_node) {
+	if (muxc->dev->of_node) {
 		struct device_node *child;
 		u32 reg;
 
-		for_each_child_of_node(mux_dev->of_node, child) {
+		for_each_child_of_node(muxc->dev->of_node, child) {
 			ret = of_property_read_u32(child, "reg", &reg);
 			if (ret)
 				continue;
@@ -177,8 +352,9 @@
 	/*
 	 * Associate the mux channel with an ACPI node.
 	 */
-	if (has_acpi_companion(mux_dev))
-		acpi_preset_companion(&priv->adap.dev, ACPI_COMPANION(mux_dev),
+	if (has_acpi_companion(muxc->dev))
+		acpi_preset_companion(&priv->adap.dev,
+				      ACPI_COMPANION(muxc->dev),
 				      chan_id);
 
 	if (force_nr) {
@@ -192,35 +368,45 @@
 			"failed to add mux-adapter (error=%d)\n",
 			ret);
 		kfree(priv);
-		return NULL;
+		return ret;
 	}
 
-	WARN(sysfs_create_link(&priv->adap.dev.kobj, &mux_dev->kobj, "mux_device"),
-			       "can't create symlink to mux device\n");
+	WARN(sysfs_create_link(&priv->adap.dev.kobj, &muxc->dev->kobj,
+			       "mux_device"),
+	     "can't create symlink to mux device\n");
 
 	snprintf(symlink_name, sizeof(symlink_name), "channel-%u", chan_id);
-	WARN(sysfs_create_link(&mux_dev->kobj, &priv->adap.dev.kobj, symlink_name),
-			       "can't create symlink for channel %u\n", chan_id);
+	WARN(sysfs_create_link(&muxc->dev->kobj, &priv->adap.dev.kobj,
+			       symlink_name),
+	     "can't create symlink for channel %u\n", chan_id);
 	dev_info(&parent->dev, "Added multiplexed i2c bus %d\n",
 		 i2c_adapter_id(&priv->adap));
 
-	return &priv->adap;
+	muxc->adapter[muxc->num_adapters++] = &priv->adap;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(i2c_add_mux_adapter);
+EXPORT_SYMBOL_GPL(i2c_mux_add_adapter);
 
-void i2c_del_mux_adapter(struct i2c_adapter *adap)
+void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 {
-	struct i2c_mux_priv *priv = adap->algo_data;
 	char symlink_name[20];
 
-	snprintf(symlink_name, sizeof(symlink_name), "channel-%u", priv->chan_id);
-	sysfs_remove_link(&priv->mux_dev->kobj, symlink_name);
+	while (muxc->num_adapters) {
+		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
+		struct i2c_mux_priv *priv = adap->algo_data;
 
-	sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
-	i2c_del_adapter(adap);
-	kfree(priv);
+		muxc->adapter[muxc->num_adapters] = NULL;
+
+		snprintf(symlink_name, sizeof(symlink_name),
+			 "channel-%u", priv->chan_id);
+		sysfs_remove_link(&muxc->dev->kobj, symlink_name);
+
+		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
+		i2c_del_adapter(adap);
+		kfree(priv);
+	}
 }
-EXPORT_SYMBOL_GPL(i2c_del_mux_adapter);
+EXPORT_SYMBOL_GPL(i2c_mux_del_adapters);
 
 MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
 MODULE_DESCRIPTION("I2C driver for multiplexed I2C busses");
diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
index 402e3a6..a90bbc4 100644
--- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
+++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
@@ -28,8 +28,6 @@
 /**
  * struct i2c_arbitrator_data - Driver data for I2C arbitrator
  *
- * @parent: Parent adapter
- * @child: Child bus
  * @our_gpio: GPIO we'll use to claim.
  * @our_gpio_release: 0 if active high; 1 if active low; AKA if the GPIO ==
  *   this then consider it released.
@@ -42,8 +40,6 @@
  */
 
 struct i2c_arbitrator_data {
-	struct i2c_adapter *parent;
-	struct i2c_adapter *child;
 	int our_gpio;
 	int our_gpio_release;
 	int their_gpio;
@@ -59,9 +55,9 @@
  *
  * Use the GPIO-based signalling protocol; return -EBUSY if we fail.
  */
-static int i2c_arbitrator_select(struct i2c_adapter *adap, void *data, u32 chan)
+static int i2c_arbitrator_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	const struct i2c_arbitrator_data *arb = data;
+	const struct i2c_arbitrator_data *arb = i2c_mux_priv(muxc);
 	unsigned long stop_retry, stop_time;
 
 	/* Start a round of trying to claim the bus */
@@ -93,7 +89,7 @@
 	/* Give up, release our claim */
 	gpio_set_value(arb->our_gpio, arb->our_gpio_release);
 	udelay(arb->slew_delay_us);
-	dev_err(&adap->dev, "Could not claim bus, timeout\n");
+	dev_err(muxc->dev, "Could not claim bus, timeout\n");
 	return -EBUSY;
 }
 
@@ -102,10 +98,9 @@
  *
  * Release the I2C bus using the GPIO-based signalling protocol.
  */
-static int i2c_arbitrator_deselect(struct i2c_adapter *adap, void *data,
-				   u32 chan)
+static int i2c_arbitrator_deselect(struct i2c_mux_core *muxc, u32 chan)
 {
-	const struct i2c_arbitrator_data *arb = data;
+	const struct i2c_arbitrator_data *arb = i2c_mux_priv(muxc);
 
 	/* Release the bus and wait for the other master to notice */
 	gpio_set_value(arb->our_gpio, arb->our_gpio_release);
@@ -119,6 +114,7 @@
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct device_node *parent_np;
+	struct i2c_mux_core *muxc;
 	struct i2c_arbitrator_data *arb;
 	enum of_gpio_flags gpio_flags;
 	unsigned long out_init;
@@ -134,12 +130,13 @@
 		return -EINVAL;
 	}
 
-	arb = devm_kzalloc(dev, sizeof(*arb), GFP_KERNEL);
-	if (!arb) {
-		dev_err(dev, "Cannot allocate i2c_arbitrator_data\n");
+	muxc = i2c_mux_alloc(NULL, dev, 1, sizeof(*arb), 0,
+			     i2c_arbitrator_select, i2c_arbitrator_deselect);
+	if (!muxc)
 		return -ENOMEM;
-	}
-	platform_set_drvdata(pdev, arb);
+	arb = i2c_mux_priv(muxc);
+
+	platform_set_drvdata(pdev, muxc);
 
 	/* Request GPIOs */
 	ret = of_get_named_gpio_flags(np, "our-claim-gpio", 0, &gpio_flags);
@@ -196,21 +193,18 @@
 		dev_err(dev, "Cannot parse i2c-parent\n");
 		return -EINVAL;
 	}
-	arb->parent = of_get_i2c_adapter_by_node(parent_np);
+	muxc->parent = of_get_i2c_adapter_by_node(parent_np);
 	of_node_put(parent_np);
-	if (!arb->parent) {
+	if (!muxc->parent) {
 		dev_err(dev, "Cannot find parent bus\n");
 		return -EPROBE_DEFER;
 	}
 
 	/* Actually add the mux adapter */
-	arb->child = i2c_add_mux_adapter(arb->parent, dev, arb, 0, 0, 0,
-					 i2c_arbitrator_select,
-					 i2c_arbitrator_deselect);
-	if (!arb->child) {
+	ret = i2c_mux_add_adapter(muxc, 0, 0, 0);
+	if (ret) {
 		dev_err(dev, "Failed to add adapter\n");
-		ret = -ENODEV;
-		i2c_put_adapter(arb->parent);
+		i2c_put_adapter(muxc->parent);
 	}
 
 	return ret;
@@ -218,11 +212,10 @@
 
 static int i2c_arbitrator_remove(struct platform_device *pdev)
 {
-	struct i2c_arbitrator_data *arb = platform_get_drvdata(pdev);
+	struct i2c_mux_core *muxc = platform_get_drvdata(pdev);
 
-	i2c_del_mux_adapter(arb->child);
-	i2c_put_adapter(arb->parent);
-
+	i2c_mux_del_adapters(muxc);
+	i2c_put_adapter(muxc->parent);
 	return 0;
 }
 
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index b8e11c1..e5cf26e 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -15,11 +15,10 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/gpio.h>
+#include "../../gpio/gpiolib.h"
 #include <linux/of_gpio.h>
 
 struct gpiomux {
-	struct i2c_adapter *parent;
-	struct i2c_adapter **adap; /* child busses */
 	struct i2c_mux_gpio_platform_data data;
 	unsigned gpio_base;
 };
@@ -33,18 +32,18 @@
 					val & (1 << i));
 }
 
-static int i2c_mux_gpio_select(struct i2c_adapter *adap, void *data, u32 chan)
+static int i2c_mux_gpio_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct gpiomux *mux = data;
+	struct gpiomux *mux = i2c_mux_priv(muxc);
 
 	i2c_mux_gpio_set(mux, chan);
 
 	return 0;
 }
 
-static int i2c_mux_gpio_deselect(struct i2c_adapter *adap, void *data, u32 chan)
+static int i2c_mux_gpio_deselect(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct gpiomux *mux = data;
+	struct gpiomux *mux = i2c_mux_priv(muxc);
 
 	i2c_mux_gpio_set(mux, mux->data.idle);
 
@@ -136,19 +135,16 @@
 
 static int i2c_mux_gpio_probe(struct platform_device *pdev)
 {
+	struct i2c_mux_core *muxc;
 	struct gpiomux *mux;
 	struct i2c_adapter *parent;
-	int (*deselect) (struct i2c_adapter *, void *, u32);
+	struct i2c_adapter *root;
 	unsigned initial_state, gpio_base;
 	int i, ret;
 
 	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
-	if (!mux) {
-		dev_err(&pdev->dev, "Cannot allocate gpiomux structure");
+	if (!mux)
 		return -ENOMEM;
-	}
-
-	platform_set_drvdata(pdev, mux);
 
 	if (!dev_get_platdata(&pdev->dev)) {
 		ret = i2c_mux_gpio_probe_dt(mux, pdev);
@@ -180,27 +176,32 @@
 	if (!parent)
 		return -EPROBE_DEFER;
 
-	mux->parent = parent;
-	mux->gpio_base = gpio_base;
-
-	mux->adap = devm_kzalloc(&pdev->dev,
-				 sizeof(*mux->adap) * mux->data.n_values,
-				 GFP_KERNEL);
-	if (!mux->adap) {
-		dev_err(&pdev->dev, "Cannot allocate i2c_adapter structure");
+	muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, 0, 0,
+			     i2c_mux_gpio_select, NULL);
+	if (!muxc) {
 		ret = -ENOMEM;
 		goto alloc_failed;
 	}
+	muxc->priv = mux;
+
+	platform_set_drvdata(pdev, muxc);
+
+	root = i2c_root_adapter(&parent->dev);
+
+	muxc->mux_locked = true;
+	mux->gpio_base = gpio_base;
 
 	if (mux->data.idle != I2C_MUX_GPIO_NO_IDLE) {
 		initial_state = mux->data.idle;
-		deselect = i2c_mux_gpio_deselect;
+		muxc->deselect = i2c_mux_gpio_deselect;
 	} else {
 		initial_state = mux->data.values[0];
-		deselect = NULL;
 	}
 
 	for (i = 0; i < mux->data.n_gpios; i++) {
+		struct device *gpio_dev;
+		struct gpio_desc *gpio_desc;
+
 		ret = gpio_request(gpio_base + mux->data.gpios[i], "i2c-mux-gpio");
 		if (ret) {
 			dev_err(&pdev->dev, "Failed to request GPIO %d\n",
@@ -217,17 +218,24 @@
 			i++;	/* gpio_request above succeeded, so must free */
 			goto err_request_gpio;
 		}
+
+		if (!muxc->mux_locked)
+			continue;
+
+		gpio_desc = gpio_to_desc(gpio_base + mux->data.gpios[i]);
+		gpio_dev = &gpio_desc->gdev->dev;
+		muxc->mux_locked = i2c_root_adapter(gpio_dev) == root;
 	}
 
+	if (muxc->mux_locked)
+		dev_info(&pdev->dev, "mux-locked i2c mux\n");
+
 	for (i = 0; i < mux->data.n_values; i++) {
 		u32 nr = mux->data.base_nr ? (mux->data.base_nr + i) : 0;
 		unsigned int class = mux->data.classes ? mux->data.classes[i] : 0;
 
-		mux->adap[i] = i2c_add_mux_adapter(parent, &pdev->dev, mux, nr,
-						   mux->data.values[i], class,
-						   i2c_mux_gpio_select, deselect);
-		if (!mux->adap[i]) {
-			ret = -ENODEV;
+		ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class);
+		if (ret) {
 			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
 			goto add_adapter_failed;
 		}
@@ -239,8 +247,7 @@
 	return 0;
 
 add_adapter_failed:
-	for (; i > 0; i--)
-		i2c_del_mux_adapter(mux->adap[i - 1]);
+	i2c_mux_del_adapters(muxc);
 	i = mux->data.n_gpios;
 err_request_gpio:
 	for (; i > 0; i--)
@@ -253,16 +260,16 @@
 
 static int i2c_mux_gpio_remove(struct platform_device *pdev)
 {
-	struct gpiomux *mux = platform_get_drvdata(pdev);
+	struct i2c_mux_core *muxc = platform_get_drvdata(pdev);
+	struct gpiomux *mux = i2c_mux_priv(muxc);
 	int i;
 
-	for (i = 0; i < mux->data.n_values; i++)
-		i2c_del_mux_adapter(mux->adap[i]);
+	i2c_mux_del_adapters(muxc);
 
 	for (i = 0; i < mux->data.n_gpios; i++)
 		gpio_free(mux->gpio_base + mux->data.gpios[i]);
 
-	i2c_put_adapter(mux->parent);
+	i2c_put_adapter(muxc->parent);
 
 	return 0;
 }
diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c
index d0ba424..3cb8af6 100644
--- a/drivers/i2c/muxes/i2c-mux-pca9541.c
+++ b/drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -73,7 +73,7 @@
 #define SELECT_DELAY_LONG	1000
 
 struct pca9541 {
-	struct i2c_adapter *mux_adap;
+	struct i2c_client *client;
 	unsigned long select_timeout;
 	unsigned long arb_timeout;
 };
@@ -217,7 +217,8 @@
  */
 static int pca9541_arbitrate(struct i2c_client *client)
 {
-	struct pca9541 *data = i2c_get_clientdata(client);
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
+	struct pca9541 *data = i2c_mux_priv(muxc);
 	int reg;
 
 	reg = pca9541_reg_read(client, PCA9541_CONTROL);
@@ -285,9 +286,10 @@
 	return 0;
 }
 
-static int pca9541_select_chan(struct i2c_adapter *adap, void *client, u32 chan)
+static int pca9541_select_chan(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct pca9541 *data = i2c_get_clientdata(client);
+	struct pca9541 *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
 	int ret;
 	unsigned long timeout = jiffies + ARB2_TIMEOUT;
 		/* give up after this time */
@@ -309,9 +311,11 @@
 	return -ETIMEDOUT;
 }
 
-static int pca9541_release_chan(struct i2c_adapter *adap,
-				void *client, u32 chan)
+static int pca9541_release_chan(struct i2c_mux_core *muxc, u32 chan)
 {
+	struct pca9541 *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
+
 	pca9541_release_bus(client);
 	return 0;
 }
@@ -324,20 +328,13 @@
 {
 	struct i2c_adapter *adap = client->adapter;
 	struct pca954x_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct i2c_mux_core *muxc;
 	struct pca9541 *data;
 	int force;
-	int ret = -ENODEV;
+	int ret;
 
 	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_BYTE_DATA))
-		goto err;
-
-	data = kzalloc(sizeof(struct pca9541), GFP_KERNEL);
-	if (!data) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	i2c_set_clientdata(client, data);
+		return -ENODEV;
 
 	/*
 	 * I2C accesses are unprotected here.
@@ -352,34 +349,33 @@
 	force = 0;
 	if (pdata)
 		force = pdata->modes[0].adap_id;
-	data->mux_adap = i2c_add_mux_adapter(adap, &client->dev, client,
-					     force, 0, 0,
-					     pca9541_select_chan,
-					     pca9541_release_chan);
+	muxc = i2c_mux_alloc(adap, &client->dev, 1, sizeof(*data), 0,
+			     pca9541_select_chan, pca9541_release_chan);
+	if (!muxc)
+		return -ENOMEM;
 
-	if (data->mux_adap == NULL) {
+	data = i2c_mux_priv(muxc);
+	data->client = client;
+
+	i2c_set_clientdata(client, muxc);
+
+	ret = i2c_mux_add_adapter(muxc, force, 0, 0);
+	if (ret) {
 		dev_err(&client->dev, "failed to register master selector\n");
-		goto exit_free;
+		return ret;
 	}
 
 	dev_info(&client->dev, "registered master selector for I2C %s\n",
 		 client->name);
 
 	return 0;
-
-exit_free:
-	kfree(data);
-err:
-	return ret;
 }
 
 static int pca9541_remove(struct i2c_client *client)
 {
-	struct pca9541 *data = i2c_get_clientdata(client);
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
 
-	i2c_del_mux_adapter(data->mux_adap);
-
-	kfree(data);
+	i2c_mux_del_adapters(muxc);
 	return 0;
 }
 
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index acfcef3..528e755 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -60,9 +60,10 @@
 
 struct pca954x {
 	enum pca_type type;
-	struct i2c_adapter *virt_adaps[PCA954X_MAX_NCHANS];
 
 	u8 last_chan;		/* last register value */
+	u8 deselect;
+	struct i2c_client *client;
 };
 
 struct chip_desc {
@@ -146,10 +147,10 @@
 	return ret;
 }
 
-static int pca954x_select_chan(struct i2c_adapter *adap,
-			       void *client, u32 chan)
+static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct pca954x *data = i2c_get_clientdata(client);
+	struct pca954x *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
 	const struct chip_desc *chip = &chips[data->type];
 	u8 regval;
 	int ret = 0;
@@ -162,21 +163,24 @@
 
 	/* Only select the channel if its different from the last channel */
 	if (data->last_chan != regval) {
-		ret = pca954x_reg_write(adap, client, regval);
+		ret = pca954x_reg_write(muxc->parent, client, regval);
 		data->last_chan = regval;
 	}
 
 	return ret;
 }
 
-static int pca954x_deselect_mux(struct i2c_adapter *adap,
-				void *client, u32 chan)
+static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct pca954x *data = i2c_get_clientdata(client);
+	struct pca954x *data = i2c_mux_priv(muxc);
+	struct i2c_client *client = data->client;
+
+	if (!(data->deselect & (1 << chan)))
+		return 0;
 
 	/* Deselect active channel */
 	data->last_chan = 0;
-	return pca954x_reg_write(adap, client, data->last_chan);
+	return pca954x_reg_write(muxc->parent, client, data->last_chan);
 }
 
 /*
@@ -191,17 +195,22 @@
 	bool idle_disconnect_dt;
 	struct gpio_desc *gpio;
 	int num, force, class;
+	struct i2c_mux_core *muxc;
 	struct pca954x *data;
 	int ret;
 
 	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_BYTE))
 		return -ENODEV;
 
-	data = devm_kzalloc(&client->dev, sizeof(struct pca954x), GFP_KERNEL);
-	if (!data)
+	muxc = i2c_mux_alloc(adap, &client->dev,
+			     PCA954X_MAX_NCHANS, sizeof(*data), 0,
+			     pca954x_select_chan, pca954x_deselect_mux);
+	if (!muxc)
 		return -ENOMEM;
+	data = i2c_mux_priv(muxc);
 
-	i2c_set_clientdata(client, data);
+	i2c_set_clientdata(client, muxc);
+	data->client = client;
 
 	/* Get the mux out of reset if a reset GPIO is specified. */
 	gpio = devm_gpiod_get_optional(&client->dev, "reset", GPIOD_OUT_LOW);
@@ -238,16 +247,13 @@
 				/* discard unconfigured channels */
 				break;
 			idle_disconnect_pd = pdata->modes[num].deselect_on_exit;
+			data->deselect |= (idle_disconnect_pd
+					   || idle_disconnect_dt) << num;
 		}
 
-		data->virt_adaps[num] =
-			i2c_add_mux_adapter(adap, &client->dev, client,
-				force, num, class, pca954x_select_chan,
-				(idle_disconnect_pd || idle_disconnect_dt)
-					? pca954x_deselect_mux : NULL);
+		ret = i2c_mux_add_adapter(muxc, force, num, class);
 
-		if (data->virt_adaps[num] == NULL) {
-			ret = -ENODEV;
+		if (ret) {
 			dev_err(&client->dev,
 				"failed to register multiplexed adapter"
 				" %d as bus %d\n", num, force);
@@ -263,23 +269,15 @@
 	return 0;
 
 virt_reg_failed:
-	for (num--; num >= 0; num--)
-		i2c_del_mux_adapter(data->virt_adaps[num]);
+	i2c_mux_del_adapters(muxc);
 	return ret;
 }
 
 static int pca954x_remove(struct i2c_client *client)
 {
-	struct pca954x *data = i2c_get_clientdata(client);
-	const struct chip_desc *chip = &chips[data->type];
-	int i;
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
 
-	for (i = 0; i < chip->nchans; ++i)
-		if (data->virt_adaps[i]) {
-			i2c_del_mux_adapter(data->virt_adaps[i]);
-			data->virt_adaps[i] = NULL;
-		}
-
+	i2c_mux_del_adapters(muxc);
 	return 0;
 }
 
@@ -287,7 +285,8 @@
 static int pca954x_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct pca954x *data = i2c_get_clientdata(client);
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
+	struct pca954x *data = i2c_mux_priv(muxc);
 
 	data->last_chan = 0;
 	return i2c_smbus_write_byte(client, 0);
diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c
index b5a982b..35bb775 100644
--- a/drivers/i2c/muxes/i2c-mux-pinctrl.c
+++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c
@@ -24,36 +24,32 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include "../../pinctrl/core.h"
 
 struct i2c_mux_pinctrl {
-	struct device *dev;
 	struct i2c_mux_pinctrl_platform_data *pdata;
 	struct pinctrl *pinctrl;
 	struct pinctrl_state **states;
 	struct pinctrl_state *state_idle;
-	struct i2c_adapter *parent;
-	struct i2c_adapter **busses;
 };
 
-static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data,
-				  u32 chan)
+static int i2c_mux_pinctrl_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct i2c_mux_pinctrl *mux = data;
+	struct i2c_mux_pinctrl *mux = i2c_mux_priv(muxc);
 
 	return pinctrl_select_state(mux->pinctrl, mux->states[chan]);
 }
 
-static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data,
-				    u32 chan)
+static int i2c_mux_pinctrl_deselect(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct i2c_mux_pinctrl *mux = data;
+	struct i2c_mux_pinctrl *mux = i2c_mux_priv(muxc);
 
 	return pinctrl_select_state(mux->pinctrl, mux->state_idle);
 }
 
 #ifdef CONFIG_OF
 static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
-				struct platform_device *pdev)
+				    struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	int num_names, i, ret;
@@ -64,15 +60,12 @@
 		return 0;
 
 	mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL);
-	if (!mux->pdata) {
-		dev_err(mux->dev,
-			"Cannot allocate i2c_mux_pinctrl_platform_data\n");
+	if (!mux->pdata)
 		return -ENOMEM;
-	}
 
 	num_names = of_property_count_strings(np, "pinctrl-names");
 	if (num_names < 0) {
-		dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+		dev_err(&pdev->dev, "Cannot parse pinctrl-names: %d\n",
 			num_names);
 		return num_names;
 	}
@@ -80,23 +73,22 @@
 	mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev,
 		sizeof(*mux->pdata->pinctrl_states) * num_names,
 		GFP_KERNEL);
-	if (!mux->pdata->pinctrl_states) {
-		dev_err(mux->dev, "Cannot allocate pinctrl_states\n");
+	if (!mux->pdata->pinctrl_states)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < num_names; i++) {
 		ret = of_property_read_string_index(np, "pinctrl-names", i,
 			&mux->pdata->pinctrl_states[mux->pdata->bus_count]);
 		if (ret < 0) {
-			dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+			dev_err(&pdev->dev, "Cannot parse pinctrl-names: %d\n",
 				ret);
 			return ret;
 		}
 		if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count],
 			    "idle")) {
 			if (i != num_names - 1) {
-				dev_err(mux->dev, "idle state must be last\n");
+				dev_err(&pdev->dev,
+					"idle state must be last\n");
 				return -EINVAL;
 			}
 			mux->pdata->pinctrl_state_idle = "idle";
@@ -107,13 +99,13 @@
 
 	adapter_np = of_parse_phandle(np, "i2c-parent", 0);
 	if (!adapter_np) {
-		dev_err(mux->dev, "Cannot parse i2c-parent\n");
+		dev_err(&pdev->dev, "Cannot parse i2c-parent\n");
 		return -ENODEV;
 	}
 	adapter = of_find_i2c_adapter_by_node(adapter_np);
 	of_node_put(adapter_np);
 	if (!adapter) {
-		dev_err(mux->dev, "Cannot find parent bus\n");
+		dev_err(&pdev->dev, "Cannot find parent bus\n");
 		return -EPROBE_DEFER;
 	}
 	mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
@@ -129,21 +121,38 @@
 }
 #endif
 
+static struct i2c_adapter *i2c_mux_pinctrl_root_adapter(
+	struct pinctrl_state *state)
+{
+	struct i2c_adapter *root = NULL;
+	struct pinctrl_setting *setting;
+	struct i2c_adapter *pin_root;
+
+	list_for_each_entry(setting, &state->settings, node) {
+		pin_root = i2c_root_adapter(setting->pctldev->dev);
+		if (!pin_root)
+			return NULL;
+		if (!root)
+			root = pin_root;
+		else if (root != pin_root)
+			return NULL;
+	}
+
+	return root;
+}
+
 static int i2c_mux_pinctrl_probe(struct platform_device *pdev)
 {
+	struct i2c_mux_core *muxc;
 	struct i2c_mux_pinctrl *mux;
-	int (*deselect)(struct i2c_adapter *, void *, u32);
+	struct i2c_adapter *root;
 	int i, ret;
 
 	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
 	if (!mux) {
-		dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n");
 		ret = -ENOMEM;
 		goto err;
 	}
-	platform_set_drvdata(pdev, mux);
-
-	mux->dev = &pdev->dev;
 
 	mux->pdata = dev_get_platdata(&pdev->dev);
 	if (!mux->pdata) {
@@ -166,14 +175,15 @@
 		goto err;
 	}
 
-	mux->busses = devm_kzalloc(&pdev->dev,
-				   sizeof(*mux->busses) * mux->pdata->bus_count,
-				   GFP_KERNEL);
-	if (!mux->busses) {
-		dev_err(&pdev->dev, "Cannot allocate busses\n");
+	muxc = i2c_mux_alloc(NULL, &pdev->dev, mux->pdata->bus_count, 0, 0,
+			     i2c_mux_pinctrl_select, NULL);
+	if (!muxc) {
 		ret = -ENOMEM;
 		goto err;
 	}
+	muxc->priv = mux;
+
+	platform_set_drvdata(pdev, muxc);
 
 	mux->pinctrl = devm_pinctrl_get(&pdev->dev);
 	if (IS_ERR(mux->pinctrl)) {
@@ -184,13 +194,13 @@
 	for (i = 0; i < mux->pdata->bus_count; i++) {
 		mux->states[i] = pinctrl_lookup_state(mux->pinctrl,
 						mux->pdata->pinctrl_states[i]);
-			if (IS_ERR(mux->states[i])) {
-				ret = PTR_ERR(mux->states[i]);
-				dev_err(&pdev->dev,
-					"Cannot look up pinctrl state %s: %d\n",
-					mux->pdata->pinctrl_states[i], ret);
-				goto err;
-			}
+		if (IS_ERR(mux->states[i])) {
+			ret = PTR_ERR(mux->states[i]);
+			dev_err(&pdev->dev,
+				"Cannot look up pinctrl state %s: %d\n",
+				mux->pdata->pinctrl_states[i], ret);
+			goto err;
+		}
 	}
 	if (mux->pdata->pinctrl_state_idle) {
 		mux->state_idle = pinctrl_lookup_state(mux->pinctrl,
@@ -203,29 +213,39 @@
 			goto err;
 		}
 
-		deselect = i2c_mux_pinctrl_deselect;
-	} else {
-		deselect = NULL;
+		muxc->deselect = i2c_mux_pinctrl_deselect;
 	}
 
-	mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
-	if (!mux->parent) {
+	muxc->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
+	if (!muxc->parent) {
 		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
 			mux->pdata->parent_bus_num);
 		ret = -EPROBE_DEFER;
 		goto err;
 	}
 
+	root = i2c_root_adapter(&muxc->parent->dev);
+
+	muxc->mux_locked = true;
+	for (i = 0; i < mux->pdata->bus_count; i++) {
+		if (root != i2c_mux_pinctrl_root_adapter(mux->states[i])) {
+			muxc->mux_locked = false;
+			break;
+		}
+	}
+	if (muxc->mux_locked && mux->pdata->pinctrl_state_idle &&
+	    root != i2c_mux_pinctrl_root_adapter(mux->state_idle))
+		muxc->mux_locked = false;
+
+	if (muxc->mux_locked)
+		dev_info(&pdev->dev, "mux-locked i2c mux\n");
+
 	for (i = 0; i < mux->pdata->bus_count; i++) {
 		u32 bus = mux->pdata->base_bus_num ?
 				(mux->pdata->base_bus_num + i) : 0;
 
-		mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev,
-						     mux, bus, i, 0,
-						     i2c_mux_pinctrl_select,
-						     deselect);
-		if (!mux->busses[i]) {
-			ret = -ENODEV;
+		ret = i2c_mux_add_adapter(muxc, bus, i, 0);
+		if (ret) {
 			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
 			goto err_del_adapter;
 		}
@@ -234,23 +254,18 @@
 	return 0;
 
 err_del_adapter:
-	for (; i > 0; i--)
-		i2c_del_mux_adapter(mux->busses[i - 1]);
-	i2c_put_adapter(mux->parent);
+	i2c_mux_del_adapters(muxc);
+	i2c_put_adapter(muxc->parent);
 err:
 	return ret;
 }
 
 static int i2c_mux_pinctrl_remove(struct platform_device *pdev)
 {
-	struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev);
-	int i;
+	struct i2c_mux_core *muxc = platform_get_drvdata(pdev);
 
-	for (i = 0; i < mux->pdata->bus_count; i++)
-		i2c_del_mux_adapter(mux->busses[i]);
-
-	i2c_put_adapter(mux->parent);
-
+	i2c_mux_del_adapters(muxc);
+	i2c_put_adapter(muxc->parent);
 	return 0;
 }
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 5fbd5bd..6773cad 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -21,8 +21,6 @@
 #include <linux/slab.h>
 
 struct regmux {
-	struct i2c_adapter *parent;
-	struct i2c_adapter **adap; /* child busses */
 	struct i2c_mux_reg_platform_data data;
 };
 
@@ -64,18 +62,16 @@
 	return 0;
 }
 
-static int i2c_mux_reg_select(struct i2c_adapter *adap, void *data,
-			      unsigned int chan)
+static int i2c_mux_reg_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct regmux *mux = data;
+	struct regmux *mux = i2c_mux_priv(muxc);
 
 	return i2c_mux_reg_set(mux, chan);
 }
 
-static int i2c_mux_reg_deselect(struct i2c_adapter *adap, void *data,
-				unsigned int chan)
+static int i2c_mux_reg_deselect(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct regmux *mux = data;
+	struct regmux *mux = i2c_mux_priv(muxc);
 
 	if (mux->data.idle_in_use)
 		return i2c_mux_reg_set(mux, mux->data.idle);
@@ -85,7 +81,7 @@
 
 #ifdef CONFIG_OF
 static int i2c_mux_reg_probe_dt(struct regmux *mux,
-					struct platform_device *pdev)
+				struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *adapter_np, *child;
@@ -107,7 +103,6 @@
 	if (!adapter)
 		return -EPROBE_DEFER;
 
-	mux->parent = adapter;
 	mux->data.parent = i2c_adapter_id(adapter);
 	put_device(&adapter->dev);
 
@@ -161,7 +156,7 @@
 }
 #else
 static int i2c_mux_reg_probe_dt(struct regmux *mux,
-					struct platform_device *pdev)
+				struct platform_device *pdev)
 {
 	return 0;
 }
@@ -169,10 +164,10 @@
 
 static int i2c_mux_reg_probe(struct platform_device *pdev)
 {
+	struct i2c_mux_core *muxc;
 	struct regmux *mux;
 	struct i2c_adapter *parent;
 	struct resource *res;
-	int (*deselect)(struct i2c_adapter *, void *, u32);
 	unsigned int class;
 	int i, ret, nr;
 
@@ -180,17 +175,9 @@
 	if (!mux)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, mux);
-
 	if (dev_get_platdata(&pdev->dev)) {
 		memcpy(&mux->data, dev_get_platdata(&pdev->dev),
 			sizeof(mux->data));
-
-		parent = i2c_get_adapter(mux->data.parent);
-		if (!parent)
-			return -EPROBE_DEFER;
-
-		mux->parent = parent;
 	} else {
 		ret = i2c_mux_reg_probe_dt(mux, pdev);
 		if (ret < 0) {
@@ -199,6 +186,10 @@
 		}
 	}
 
+	parent = i2c_get_adapter(mux->data.parent);
+	if (!parent)
+		return -EPROBE_DEFER;
+
 	if (!mux->data.reg) {
 		dev_info(&pdev->dev,
 			"Register not set, using platform resource\n");
@@ -215,55 +206,45 @@
 		return -EINVAL;
 	}
 
-	mux->adap = devm_kzalloc(&pdev->dev,
-				 sizeof(*mux->adap) * mux->data.n_values,
-				 GFP_KERNEL);
-	if (!mux->adap) {
-		dev_err(&pdev->dev, "Cannot allocate i2c_adapter structure");
+	muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values, 0, 0,
+			     i2c_mux_reg_select, NULL);
+	if (!muxc)
 		return -ENOMEM;
-	}
+	muxc->priv = mux;
+
+	platform_set_drvdata(pdev, muxc);
 
 	if (mux->data.idle_in_use)
-		deselect = i2c_mux_reg_deselect;
-	else
-		deselect = NULL;
+		muxc->deselect = i2c_mux_reg_deselect;
 
 	for (i = 0; i < mux->data.n_values; i++) {
 		nr = mux->data.base_nr ? (mux->data.base_nr + i) : 0;
 		class = mux->data.classes ? mux->data.classes[i] : 0;
 
-		mux->adap[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev, mux,
-						   nr, mux->data.values[i],
-						   class, i2c_mux_reg_select,
-						   deselect);
-		if (!mux->adap[i]) {
-			ret = -ENODEV;
+		ret = i2c_mux_add_adapter(muxc, nr, mux->data.values[i], class);
+		if (ret) {
 			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
 			goto add_adapter_failed;
 		}
 	}
 
 	dev_dbg(&pdev->dev, "%d port mux on %s adapter\n",
-		 mux->data.n_values, mux->parent->name);
+		 mux->data.n_values, muxc->parent->name);
 
 	return 0;
 
 add_adapter_failed:
-	for (; i > 0; i--)
-		i2c_del_mux_adapter(mux->adap[i - 1]);
+	i2c_mux_del_adapters(muxc);
 
 	return ret;
 }
 
 static int i2c_mux_reg_remove(struct platform_device *pdev)
 {
-	struct regmux *mux = platform_get_drvdata(pdev);
-	int i;
+	struct i2c_mux_core *muxc = platform_get_drvdata(pdev);
 
-	for (i = 0; i < mux->data.n_values; i++)
-		i2c_del_mux_adapter(mux->adap[i]);
-
-	i2c_put_adapter(mux->parent);
+	i2c_mux_del_adapters(muxc);
+	i2c_put_adapter(muxc->parent);
 
 	return 0;
 }
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index 2771106..f62b8bd 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -183,7 +183,7 @@
 			} else
 				return 0; /* no secondary addr, which is OK */
 		}
-		st->mux_client = i2c_new_device(st->mux_adapter, &info);
+		st->mux_client = i2c_new_device(st->muxc->adapter[0], &info);
 		if (!st->mux_client)
 			return -ENODEV;
 	}
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index d192953..0c2bded 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -23,7 +23,6 @@
 #include <linux/kfifo.h>
 #include <linux/spinlock.h>
 #include <linux/iio/iio.h>
-#include <linux/i2c-mux.h>
 #include <linux/acpi.h>
 #include "inv_mpu_iio.h"
 
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
index 5ee4e0d..9ba1179 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
@@ -15,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
-#include <linux/i2c-mux.h>
 #include <linux/iio/iio.h>
 #include <linux/module.h>
 #include "inv_mpu_iio.h"
@@ -25,46 +24,16 @@
 	.val_bits = 8,
 };
 
-/*
- * The i2c read/write needs to happen in unlocked mode. As the parent
- * adapter is common. If we use locked versions, it will fail as
- * the mux adapter will lock the parent i2c adapter, while calling
- * select/deselect functions.
- */
-static int inv_mpu6050_write_reg_unlocked(struct i2c_client *client,
-					  u8 reg, u8 d)
+static int inv_mpu6050_select_bypass(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	int ret;
-	u8 buf[2] = {reg, d};
-	struct i2c_msg msg[1] = {
-		{
-			.addr = client->addr,
-			.flags = 0,
-			.len = sizeof(buf),
-			.buf = buf,
-		}
-	};
-
-	ret = __i2c_transfer(client->adapter, msg, 1);
-	if (ret != 1)
-		return ret;
-
-	return 0;
-}
-
-static int inv_mpu6050_select_bypass(struct i2c_adapter *adap, void *mux_priv,
-				     u32 chan_id)
-{
-	struct i2c_client *client = mux_priv;
-	struct iio_dev *indio_dev = dev_get_drvdata(&client->dev);
+	struct iio_dev *indio_dev = i2c_mux_priv(muxc);
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
 	int ret = 0;
 
 	/* Use the same mutex which was used everywhere to protect power-op */
 	mutex_lock(&indio_dev->mlock);
 	if (!st->powerup_count) {
-		ret = inv_mpu6050_write_reg_unlocked(client,
-						     st->reg->pwr_mgmt_1, 0);
+		ret = regmap_write(st->map, st->reg->pwr_mgmt_1, 0);
 		if (ret)
 			goto write_error;
 
@@ -73,10 +42,9 @@
 	}
 	if (!ret) {
 		st->powerup_count++;
-		ret = inv_mpu6050_write_reg_unlocked(client,
-						     st->reg->int_pin_cfg,
-						     INV_MPU6050_INT_PIN_CFG |
-						     INV_MPU6050_BIT_BYPASS_EN);
+		ret = regmap_write(st->map, st->reg->int_pin_cfg,
+				   INV_MPU6050_INT_PIN_CFG |
+				   INV_MPU6050_BIT_BYPASS_EN);
 	}
 write_error:
 	mutex_unlock(&indio_dev->mlock);
@@ -84,21 +52,18 @@
 	return ret;
 }
 
-static int inv_mpu6050_deselect_bypass(struct i2c_adapter *adap,
-				       void *mux_priv, u32 chan_id)
+static int inv_mpu6050_deselect_bypass(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	struct i2c_client *client = mux_priv;
-	struct iio_dev *indio_dev = dev_get_drvdata(&client->dev);
+	struct iio_dev *indio_dev = i2c_mux_priv(muxc);
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
 
 	mutex_lock(&indio_dev->mlock);
 	/* It doesn't really mattter, if any of the calls fails */
-	inv_mpu6050_write_reg_unlocked(client, st->reg->int_pin_cfg,
-				       INV_MPU6050_INT_PIN_CFG);
+	regmap_write(st->map, st->reg->int_pin_cfg, INV_MPU6050_INT_PIN_CFG);
 	st->powerup_count--;
 	if (!st->powerup_count)
-		inv_mpu6050_write_reg_unlocked(client, st->reg->pwr_mgmt_1,
-					       INV_MPU6050_BIT_SLEEP);
+		regmap_write(st->map, st->reg->pwr_mgmt_1,
+			     INV_MPU6050_BIT_SLEEP);
 	mutex_unlock(&indio_dev->mlock);
 
 	return 0;
@@ -160,16 +125,18 @@
 		return result;
 
 	st = iio_priv(dev_get_drvdata(&client->dev));
-	st->mux_adapter = i2c_add_mux_adapter(client->adapter,
-					      &client->dev,
-					      client,
-					      0, 0, 0,
-					      inv_mpu6050_select_bypass,
-					      inv_mpu6050_deselect_bypass);
-	if (!st->mux_adapter) {
-		result = -ENODEV;
+	st->muxc = i2c_mux_alloc(client->adapter, &client->dev,
+				 1, 0, I2C_MUX_LOCKED,
+				 inv_mpu6050_select_bypass,
+				 inv_mpu6050_deselect_bypass);
+	if (!st->muxc) {
+		result = -ENOMEM;
 		goto out_unreg_device;
 	}
+	st->muxc->priv = dev_get_drvdata(&client->dev);
+	result = i2c_mux_add_adapter(st->muxc, 0, 0, 0);
+	if (result)
+		goto out_unreg_device;
 
 	result = inv_mpu_acpi_create_mux_client(client);
 	if (result)
@@ -178,7 +145,7 @@
 	return 0;
 
 out_del_mux:
-	i2c_del_mux_adapter(st->mux_adapter);
+	i2c_mux_del_adapters(st->muxc);
 out_unreg_device:
 	inv_mpu_core_remove(&client->dev);
 	return result;
@@ -190,7 +157,7 @@
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
 
 	inv_mpu_acpi_delete_mux_client(client);
-	i2c_del_mux_adapter(st->mux_adapter);
+	i2c_mux_del_adapters(st->muxc);
 
 	return inv_mpu_core_remove(&client->dev);
 }
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index e302a49..bb3cef6 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -11,6 +11,7 @@
 * GNU General Public License for more details.
 */
 #include <linux/i2c.h>
+#include <linux/i2c-mux.h>
 #include <linux/kfifo.h>
 #include <linux/spinlock.h>
 #include <linux/iio/iio.h>
@@ -127,7 +128,7 @@
 	const struct inv_mpu6050_hw *hw;
 	enum   inv_devices chip_type;
 	spinlock_t time_stamp_lock;
-	struct i2c_adapter *mux_adapter;
+	struct i2c_mux_core *muxc;
 	struct i2c_client *mux_client;
 	unsigned int powerup_count;
 	struct inv_mpu6050_platform_data plat_data;
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd1dc39..ad08603 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -76,8 +76,7 @@
 
 config FSL_PAMU
 	bool "Freescale IOMMU support"
-	depends on PPC32
-	depends on PPC_E500MC || COMPILE_TEST
+	depends on PPC_E500MC || (COMPILE_TEST && PPC)
 	select IOMMU_API
 	select GENERIC_ALLOCATOR
 	help
@@ -124,16 +123,6 @@
 	  your BIOS for an option to enable it or if you have an IVRS ACPI
 	  table.
 
-config AMD_IOMMU_STATS
-	bool "Export AMD IOMMU statistics to debugfs"
-	depends on AMD_IOMMU
-	select DEBUG_FS
-	---help---
-	  This option enables code in the AMD IOMMU driver to collect various
-	  statistics about whats happening in the driver and exports that
-	  information to userspace via debugfs.
-	  If unsure, say N.
-
 config AMD_IOMMU_V2
 	tristate "AMD IOMMU Version 2 driver"
 	depends on AMD_IOMMU
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 5efadad..634f636 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -19,6 +19,8 @@
 
 #include <linux/ratelimit.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
 #include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
@@ -72,6 +74,7 @@
 
 LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
+LIST_HEAD(acpihid_map);
 
 /*
  * Domain for untranslated devices - only allocated
@@ -162,18 +165,65 @@
  *
  ****************************************************************************/
 
-static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+static inline int match_hid_uid(struct device *dev,
+				struct acpihid_map_entry *entry)
 {
-	return container_of(dom, struct protection_domain, domain);
+	const char *hid, *uid;
+
+	hid = acpi_device_hid(ACPI_COMPANION(dev));
+	uid = acpi_device_uid(ACPI_COMPANION(dev));
+
+	if (!hid || !(*hid))
+		return -ENODEV;
+
+	if (!uid || !(*uid))
+		return strcmp(hid, entry->hid);
+
+	if (!(*entry->uid))
+		return strcmp(hid, entry->hid);
+
+	return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid));
 }
 
-static inline u16 get_device_id(struct device *dev)
+static inline u16 get_pci_device_id(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
 	return PCI_DEVID(pdev->bus->number, pdev->devfn);
 }
 
+static inline int get_acpihid_device_id(struct device *dev,
+					struct acpihid_map_entry **entry)
+{
+	struct acpihid_map_entry *p;
+
+	list_for_each_entry(p, &acpihid_map, list) {
+		if (!match_hid_uid(dev, p)) {
+			if (entry)
+				*entry = p;
+			return p->devid;
+		}
+	}
+	return -EINVAL;
+}
+
+static inline int get_device_id(struct device *dev)
+{
+	int devid;
+
+	if (dev_is_pci(dev))
+		devid = get_pci_device_id(dev);
+	else
+		devid = get_acpihid_device_id(dev, NULL);
+
+	return devid;
+}
+
+static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct protection_domain, domain);
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -222,6 +272,7 @@
 	struct pci_dev *pdev = to_pci_dev(dev);
 	u16 devid, ivrs_alias, pci_alias;
 
+	/* The callers make sure that get_device_id() does not fail here */
 	devid = get_device_id(dev);
 	ivrs_alias = amd_iommu_alias_table[devid];
 	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
@@ -263,8 +314,7 @@
 	 */
 	if (pci_alias == devid &&
 	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
-		pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-		pdev->dma_alias_devfn = ivrs_alias & 0xff;
+		pci_add_dma_alias(pdev, ivrs_alias & 0xff);
 		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
 			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
 			dev_name(dev));
@@ -290,6 +340,29 @@
 	return dev->archdata.iommu;
 }
 
+/*
+* Find or create an IOMMU group for a acpihid device.
+*/
+static struct iommu_group *acpihid_device_group(struct device *dev)
+{
+	struct acpihid_map_entry *p, *entry = NULL;
+	int devid;
+
+	devid = get_acpihid_device_id(dev, &entry);
+	if (devid < 0)
+		return ERR_PTR(devid);
+
+	list_for_each_entry(p, &acpihid_map, list) {
+		if ((devid == p->devid) && p->group)
+			entry->group = p->group;
+	}
+
+	if (!entry->group)
+		entry->group = generic_device_group(dev);
+
+	return entry->group;
+}
+
 static bool pci_iommuv2_capable(struct pci_dev *pdev)
 {
 	static const int caps[] = {
@@ -341,9 +414,11 @@
 					   struct dma_ops_domain *dma_dom)
 {
 	struct unity_map_entry *e;
-	u16 devid;
+	int devid;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
 
 	list_for_each_entry(e, &amd_iommu_unity_map, list) {
 		if (!(devid >= e->devid_start && devid <= e->devid_end))
@@ -358,16 +433,14 @@
  */
 static bool check_device(struct device *dev)
 {
-	u16 devid;
+	int devid;
 
 	if (!dev || !dev->dma_mask)
 		return false;
 
-	/* No PCI device */
-	if (!dev_is_pci(dev))
-		return false;
-
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return false;
 
 	/* Out of our scope? */
 	if (devid > amd_iommu_last_bdf)
@@ -402,22 +475,26 @@
 
 static int iommu_init_device(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
+	int devid;
 
 	if (dev->archdata.iommu)
 		return 0;
 
-	dev_data = find_dev_data(get_device_id(dev));
+	devid = get_device_id(dev);
+	if (devid < 0)
+		return devid;
+
+	dev_data = find_dev_data(devid);
 	if (!dev_data)
 		return -ENOMEM;
 
 	dev_data->alias = get_alias(dev);
 
-	if (pci_iommuv2_capable(pdev)) {
+	if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
 		struct amd_iommu *iommu;
 
-		iommu              = amd_iommu_rlookup_table[dev_data->devid];
+		iommu = amd_iommu_rlookup_table[dev_data->devid];
 		dev_data->iommu_v2 = iommu->is_iommu_v2;
 	}
 
@@ -431,9 +508,13 @@
 
 static void iommu_ignore_device(struct device *dev)
 {
-	u16 devid, alias;
+	u16 alias;
+	int devid;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
 	alias = get_alias(dev);
 
 	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
@@ -445,8 +526,14 @@
 
 static void iommu_uninit_device(struct device *dev)
 {
-	struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev));
+	int devid;
+	struct iommu_dev_data *dev_data;
 
+	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
+	dev_data = search_dev_data(devid);
 	if (!dev_data)
 		return;
 
@@ -467,70 +554,6 @@
 	 */
 }
 
-#ifdef CONFIG_AMD_IOMMU_STATS
-
-/*
- * Initialization code for statistics collection
- */
-
-DECLARE_STATS_COUNTER(compl_wait);
-DECLARE_STATS_COUNTER(cnt_map_single);
-DECLARE_STATS_COUNTER(cnt_unmap_single);
-DECLARE_STATS_COUNTER(cnt_map_sg);
-DECLARE_STATS_COUNTER(cnt_unmap_sg);
-DECLARE_STATS_COUNTER(cnt_alloc_coherent);
-DECLARE_STATS_COUNTER(cnt_free_coherent);
-DECLARE_STATS_COUNTER(cross_page);
-DECLARE_STATS_COUNTER(domain_flush_single);
-DECLARE_STATS_COUNTER(domain_flush_all);
-DECLARE_STATS_COUNTER(alloced_io_mem);
-DECLARE_STATS_COUNTER(total_map_requests);
-DECLARE_STATS_COUNTER(complete_ppr);
-DECLARE_STATS_COUNTER(invalidate_iotlb);
-DECLARE_STATS_COUNTER(invalidate_iotlb_all);
-DECLARE_STATS_COUNTER(pri_requests);
-
-static struct dentry *stats_dir;
-static struct dentry *de_fflush;
-
-static void amd_iommu_stats_add(struct __iommu_counter *cnt)
-{
-	if (stats_dir == NULL)
-		return;
-
-	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
-				       &cnt->value);
-}
-
-static void amd_iommu_stats_init(void)
-{
-	stats_dir = debugfs_create_dir("amd-iommu", NULL);
-	if (stats_dir == NULL)
-		return;
-
-	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
-					 &amd_iommu_unmap_flush);
-
-	amd_iommu_stats_add(&compl_wait);
-	amd_iommu_stats_add(&cnt_map_single);
-	amd_iommu_stats_add(&cnt_unmap_single);
-	amd_iommu_stats_add(&cnt_map_sg);
-	amd_iommu_stats_add(&cnt_unmap_sg);
-	amd_iommu_stats_add(&cnt_alloc_coherent);
-	amd_iommu_stats_add(&cnt_free_coherent);
-	amd_iommu_stats_add(&cross_page);
-	amd_iommu_stats_add(&domain_flush_single);
-	amd_iommu_stats_add(&domain_flush_all);
-	amd_iommu_stats_add(&alloced_io_mem);
-	amd_iommu_stats_add(&total_map_requests);
-	amd_iommu_stats_add(&complete_ppr);
-	amd_iommu_stats_add(&invalidate_iotlb);
-	amd_iommu_stats_add(&invalidate_iotlb_all);
-	amd_iommu_stats_add(&pri_requests);
-}
-
-#endif
-
 /****************************************************************************
  *
  * Interrupt handling functions
@@ -653,8 +676,6 @@
 {
 	struct amd_iommu_fault fault;
 
-	INC_STATS_COUNTER(pri_requests);
-
 	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
 		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
 		return;
@@ -2284,13 +2305,17 @@
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *pdev;
 	struct iommu_dev_data *dev_data;
 	unsigned long flags;
 	int ret;
 
 	dev_data = get_dev_data(dev);
 
+	if (!dev_is_pci(dev))
+		goto skip_ats_check;
+
+	pdev = to_pci_dev(dev);
 	if (domain->flags & PD_IOMMUV2_MASK) {
 		if (!dev_data->passthrough)
 			return -EINVAL;
@@ -2309,6 +2334,7 @@
 		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
 	}
 
+skip_ats_check:
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev_data, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -2365,6 +2391,9 @@
 	__detach_device(dev_data);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
+	if (!dev_is_pci(dev))
+		return;
+
 	if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
 		pdev_iommuv2_disable(to_pci_dev(dev));
 	else if (dev_data->ats.enabled)
@@ -2378,13 +2407,15 @@
 	struct iommu_dev_data *dev_data;
 	struct iommu_domain *domain;
 	struct amd_iommu *iommu;
-	u16 devid;
-	int ret;
+	int ret, devid;
 
 	if (!check_device(dev) || get_dev_data(dev))
 		return 0;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return devid;
+
 	iommu = amd_iommu_rlookup_table[devid];
 
 	ret = iommu_init_device(dev);
@@ -2422,18 +2453,29 @@
 static void amd_iommu_remove_device(struct device *dev)
 {
 	struct amd_iommu *iommu;
-	u16 devid;
+	int devid;
 
 	if (!check_device(dev))
 		return;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
 	iommu = amd_iommu_rlookup_table[devid];
 
 	iommu_uninit_device(dev);
 	iommu_completion_wait(iommu);
 }
 
+static struct iommu_group *amd_iommu_device_group(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return pci_device_group(dev);
+
+	return acpihid_device_group(dev);
+}
+
 /*****************************************************************************
  *
  * The next functions belong to the dma_ops mapping/unmapping code.
@@ -2598,11 +2640,6 @@
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
-	INC_STATS_COUNTER(total_map_requests);
-
-	if (pages > 1)
-		INC_STATS_COUNTER(cross_page);
-
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
@@ -2623,8 +2660,6 @@
 	}
 	address += offset;
 
-	ADD_STATS_COUNTER(alloced_io_mem, size);
-
 	if (unlikely(amd_iommu_np_cache)) {
 		domain_flush_pages(&dma_dom->domain, address, size);
 		domain_flush_complete(&dma_dom->domain);
@@ -2672,8 +2707,6 @@
 		start += PAGE_SIZE;
 	}
 
-	SUB_STATS_COUNTER(alloced_io_mem, size);
-
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 }
 
@@ -2689,8 +2722,6 @@
 	struct protection_domain *domain;
 	u64 dma_mask;
 
-	INC_STATS_COUNTER(cnt_map_single);
-
 	domain = get_domain(dev);
 	if (PTR_ERR(domain) == -EINVAL)
 		return (dma_addr_t)paddr;
@@ -2711,8 +2742,6 @@
 {
 	struct protection_domain *domain;
 
-	INC_STATS_COUNTER(cnt_unmap_single);
-
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
@@ -2735,8 +2764,6 @@
 	int mapped_elems = 0;
 	u64 dma_mask;
 
-	INC_STATS_COUNTER(cnt_map_sg);
-
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return 0;
@@ -2782,8 +2809,6 @@
 	struct scatterlist *s;
 	int i;
 
-	INC_STATS_COUNTER(cnt_unmap_sg);
-
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
@@ -2806,8 +2831,6 @@
 	struct protection_domain *domain;
 	struct page *page;
 
-	INC_STATS_COUNTER(cnt_alloc_coherent);
-
 	domain = get_domain(dev);
 	if (PTR_ERR(domain) == -EINVAL) {
 		page = alloc_pages(flag, get_order(size));
@@ -2861,8 +2884,6 @@
 	struct protection_domain *domain;
 	struct page *page;
 
-	INC_STATS_COUNTER(cnt_free_coherent);
-
 	page = virt_to_page(virt_addr);
 	size = PAGE_ALIGN(size);
 
@@ -2927,7 +2948,17 @@
 
 int __init amd_iommu_init_api(void)
 {
-	return bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
+	int err = 0;
+
+	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
+	if (err)
+		return err;
+#ifdef CONFIG_ARM_AMBA
+	err = bus_set_iommu(&amba_bustype, &amd_iommu_ops);
+	if (err)
+		return err;
+#endif
+	return 0;
 }
 
 int __init amd_iommu_init_dma_ops(void)
@@ -2944,8 +2975,6 @@
 	if (!swiotlb)
 		dma_ops = &nommu_dma_ops;
 
-	amd_iommu_stats_init();
-
 	if (amd_iommu_unmap_flush)
 		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
 	else
@@ -3099,12 +3128,14 @@
 {
 	struct iommu_dev_data *dev_data = dev->archdata.iommu;
 	struct amd_iommu *iommu;
-	u16 devid;
+	int devid;
 
 	if (!check_device(dev))
 		return;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
 
 	if (dev_data->domain != NULL)
 		detach_device(dev);
@@ -3222,9 +3253,11 @@
 				     struct list_head *head)
 {
 	struct unity_map_entry *entry;
-	u16 devid;
+	int devid;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
 
 	list_for_each_entry(entry, &amd_iommu_unity_map, list) {
 		struct iommu_dm_region *region;
@@ -3271,7 +3304,7 @@
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.add_device = amd_iommu_add_device,
 	.remove_device = amd_iommu_remove_device,
-	.device_group = pci_device_group,
+	.device_group = amd_iommu_device_group,
 	.get_dm_regions = amd_iommu_get_dm_regions,
 	.put_dm_regions = amd_iommu_put_dm_regions,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
@@ -3432,8 +3465,6 @@
 static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
 				  u64 address)
 {
-	INC_STATS_COUNTER(invalidate_iotlb);
-
 	return __flush_pasid(domain, pasid, address, false);
 }
 
@@ -3454,8 +3485,6 @@
 
 static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
 {
-	INC_STATS_COUNTER(invalidate_iotlb_all);
-
 	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 			     true);
 }
@@ -3575,8 +3604,6 @@
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 
-	INC_STATS_COUNTER(complete_ppr);
-
 	dev_data = get_dev_data(&pdev->dev);
 	iommu    = amd_iommu_rlookup_table[dev_data->devid];
 
@@ -3926,6 +3953,9 @@
 	case X86_IRQ_ALLOC_TYPE_MSI:
 	case X86_IRQ_ALLOC_TYPE_MSIX:
 		devid = get_device_id(&info->msi_dev->dev);
+		if (devid < 0)
+			return NULL;
+
 		iommu = amd_iommu_rlookup_table[devid];
 		if (iommu)
 			return iommu->msi_domain;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index bf4959f..9e00341 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -44,7 +44,7 @@
  */
 #define IVRS_HEADER_LENGTH 48
 
-#define ACPI_IVHD_TYPE                  0x10
+#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
 #define ACPI_IVMD_TYPE_ALL              0x20
 #define ACPI_IVMD_TYPE                  0x21
 #define ACPI_IVMD_TYPE_RANGE            0x22
@@ -58,6 +58,11 @@
 #define IVHD_DEV_EXT_SELECT             0x46
 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
 #define IVHD_DEV_SPECIAL		0x48
+#define IVHD_DEV_ACPI_HID		0xf0
+
+#define UID_NOT_PRESENT                 0
+#define UID_IS_INTEGER                  1
+#define UID_IS_CHARACTER                2
 
 #define IVHD_SPECIAL_IOAPIC		1
 #define IVHD_SPECIAL_HPET		2
@@ -99,7 +104,11 @@
 	u64 mmio_phys;
 	u16 pci_seg;
 	u16 info;
-	u32 efr;
+	u32 efr_attr;
+
+	/* Following only valid on IVHD type 11h and 40h */
+	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
+	u64 res;
 } __attribute__((packed));
 
 /*
@@ -111,6 +120,11 @@
 	u16 devid;
 	u8 flags;
 	u32 ext;
+	u32 hidh;
+	u64 cid;
+	u8 uidf;
+	u8 uidl;
+	u8 uid;
 } __attribute__((packed));
 
 /*
@@ -133,6 +147,7 @@
 
 static bool amd_iommu_detected;
 static bool __initdata amd_iommu_disabled;
+static int amd_iommu_target_ivhd_type;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
@@ -218,8 +233,12 @@
 #define EARLY_MAP_SIZE		4
 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
+static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
+
 static int __initdata early_ioapic_map_size;
 static int __initdata early_hpet_map_size;
+static int __initdata early_acpihid_map_size;
+
 static bool __initdata cmdline_maps;
 
 static enum iommu_init_state init_state = IOMMU_START_STATE;
@@ -394,6 +413,22 @@
 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 }
 
+static inline u32 get_ivhd_header_size(struct ivhd_header *h)
+{
+	u32 size = 0;
+
+	switch (h->type) {
+	case 0x10:
+		size = 24;
+		break;
+	case 0x11:
+	case 0x40:
+		size = 40;
+		break;
+	}
+	return size;
+}
+
 /****************************************************************************
  *
  * The functions below belong to the first pass of AMD IOMMU ACPI table
@@ -408,7 +443,15 @@
  */
 static inline int ivhd_entry_length(u8 *ivhd)
 {
-	return 0x04 << (*ivhd >> 6);
+	u32 type = ((struct ivhd_entry *)ivhd)->type;
+
+	if (type < 0x80) {
+		return 0x04 << (*ivhd >> 6);
+	} else if (type == IVHD_DEV_ACPI_HID) {
+		/* For ACPI_HID, offset 21 is uid len */
+		return *((u8 *)ivhd + 21) + 22;
+	}
+	return 0;
 }
 
 /*
@@ -420,7 +463,14 @@
 	u8 *p = (void *)h, *end = (void *)h;
 	struct ivhd_entry *dev;
 
-	p += sizeof(*h);
+	u32 ivhd_size = get_ivhd_header_size(h);
+
+	if (!ivhd_size) {
+		pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+		return -EINVAL;
+	}
+
+	p += ivhd_size;
 	end += h->length;
 
 	while (p < end) {
@@ -448,6 +498,22 @@
 	return 0;
 }
 
+static int __init check_ivrs_checksum(struct acpi_table_header *table)
+{
+	int i;
+	u8 checksum = 0, *p = (u8 *)table;
+
+	for (i = 0; i < table->length; ++i)
+		checksum += p[i];
+	if (checksum != 0) {
+		/* ACPI table corrupt */
+		pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 /*
  * Iterate over all IVHD entries in the ACPI table and find the highest device
  * id which we need to handle. This is the first of three functions which parse
@@ -455,31 +521,19 @@
  */
 static int __init find_last_devid_acpi(struct acpi_table_header *table)
 {
-	int i;
-	u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
+	u8 *p = (u8 *)table, *end = (u8 *)table;
 	struct ivhd_header *h;
 
-	/*
-	 * Validate checksum here so we don't need to do it when
-	 * we actually parse the table
-	 */
-	for (i = 0; i < table->length; ++i)
-		checksum += p[i];
-	if (checksum != 0)
-		/* ACPI table corrupt */
-		return -ENODEV;
-
 	p += IVRS_HEADER_LENGTH;
 
 	end += table->length;
 	while (p < end) {
 		h = (struct ivhd_header *)p;
-		switch (h->type) {
-		case ACPI_IVHD_TYPE:
-			find_last_devid_from_ivhd(h);
-			break;
-		default:
-			break;
+		if (h->type == amd_iommu_target_ivhd_type) {
+			int ret = find_last_devid_from_ivhd(h);
+
+			if (ret)
+				return ret;
 		}
 		p += h->length;
 	}
@@ -724,6 +778,42 @@
 	return 0;
 }
 
+static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
+				      bool cmd_line)
+{
+	struct acpihid_map_entry *entry;
+	struct list_head *list = &acpihid_map;
+
+	list_for_each_entry(entry, list, list) {
+		if (strcmp(entry->hid, hid) ||
+		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
+		    !entry->cmd_line)
+			continue;
+
+		pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n",
+			hid, uid);
+		*devid = entry->devid;
+		return 0;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	memcpy(entry->uid, uid, strlen(uid));
+	memcpy(entry->hid, hid, strlen(hid));
+	entry->devid = *devid;
+	entry->cmd_line	= cmd_line;
+	entry->root_devid = (entry->devid & (~0x7));
+
+	pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n",
+		entry->cmd_line ? "cmd" : "ivrs",
+		entry->hid, entry->uid, entry->root_devid);
+
+	list_add_tail(&entry->list, list);
+	return 0;
+}
+
 static int __init add_early_maps(void)
 {
 	int i, ret;
@@ -746,6 +836,15 @@
 			return ret;
 	}
 
+	for (i = 0; i < early_acpihid_map_size; ++i) {
+		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
+					  early_acpihid_map[i].uid,
+					  &early_acpihid_map[i].devid,
+					  early_acpihid_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -785,6 +884,7 @@
 	u32 dev_i, ext_flags = 0;
 	bool alias = false;
 	struct ivhd_entry *e;
+	u32 ivhd_size;
 	int ret;
 
 
@@ -800,7 +900,14 @@
 	/*
 	 * Done. Now parse the device entries
 	 */
-	p += sizeof(struct ivhd_header);
+	ivhd_size = get_ivhd_header_size(h);
+	if (!ivhd_size) {
+		pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+		return -EINVAL;
+	}
+
+	p += ivhd_size;
+
 	end += h->length;
 
 
@@ -958,6 +1065,70 @@
 
 			break;
 		}
+		case IVHD_DEV_ACPI_HID: {
+			u16 devid;
+			u8 hid[ACPIHID_HID_LEN] = {0};
+			u8 uid[ACPIHID_UID_LEN] = {0};
+			int ret;
+
+			if (h->type != 0x40) {
+				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
+				       e->type);
+				break;
+			}
+
+			memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
+			hid[ACPIHID_HID_LEN - 1] = '\0';
+
+			if (!(*hid)) {
+				pr_err(FW_BUG "Invalid HID.\n");
+				break;
+			}
+
+			switch (e->uidf) {
+			case UID_NOT_PRESENT:
+
+				if (e->uidl != 0)
+					pr_warn(FW_BUG "Invalid UID length.\n");
+
+				break;
+			case UID_IS_INTEGER:
+
+				sprintf(uid, "%d", e->uid);
+
+				break;
+			case UID_IS_CHARACTER:
+
+				memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
+				uid[ACPIHID_UID_LEN - 1] = '\0';
+
+				break;
+			default:
+				break;
+			}
+
+			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
+				    hid, uid,
+				    PCI_BUS_NUM(devid),
+				    PCI_SLOT(devid),
+				    PCI_FUNC(devid));
+
+			devid  = e->devid;
+			flags = e->flags;
+
+			ret = add_acpi_hid_device(hid, uid, &devid, false);
+			if (ret)
+				return ret;
+
+			/*
+			 * add_special_device might update the devid in case a
+			 * command-line override is present. So call
+			 * set_dev_entry_from_acpi after add_special_device.
+			 */
+			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+
+			break;
+		}
 		default:
 			break;
 		}
@@ -1078,13 +1249,25 @@
 	iommu->pci_seg = h->pci_seg;
 	iommu->mmio_phys = h->mmio_phys;
 
-	/* Check if IVHD EFR contains proper max banks/counters */
-	if ((h->efr != 0) &&
-	    ((h->efr & (0xF << 13)) != 0) &&
-	    ((h->efr & (0x3F << 17)) != 0)) {
-		iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
-	} else {
-		iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+	switch (h->type) {
+	case 0x10:
+		/* Check if IVHD EFR contains proper max banks/counters */
+		if ((h->efr_attr != 0) &&
+		    ((h->efr_attr & (0xF << 13)) != 0) &&
+		    ((h->efr_attr & (0x3F << 17)) != 0))
+			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
+		else
+			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+		break;
+	case 0x11:
+	case 0x40:
+		if (h->efr_reg & (1 << 9))
+			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
+		else
+			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
@@ -1117,6 +1300,32 @@
 	return 0;
 }
 
+/**
+ * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
+ * @ivrs          Pointer to the IVRS header
+ *
+ * This function search through all IVDB of the maximum supported IVHD
+ */
+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+	u8 *base = (u8 *)ivrs;
+	struct ivhd_header *ivhd = (struct ivhd_header *)
+					(base + IVRS_HEADER_LENGTH);
+	u8 last_type = ivhd->type;
+	u16 devid = ivhd->devid;
+
+	while (((u8 *)ivhd - base < ivrs->length) &&
+	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+		u8 *p = (u8 *) ivhd;
+
+		if (ivhd->devid == devid)
+			last_type = ivhd->type;
+		ivhd = (struct ivhd_header *)(p + ivhd->length);
+	}
+
+	return last_type;
+}
+
 /*
  * Iterates over all IOMMU entries in the ACPI table, allocates the
  * IOMMU structure and initializes it with init_iommu_one()
@@ -1133,8 +1342,7 @@
 
 	while (p < end) {
 		h = (struct ivhd_header *)p;
-		switch (*p) {
-		case ACPI_IVHD_TYPE:
+		if (*p == amd_iommu_target_ivhd_type) {
 
 			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
 				    "seg: %d flags: %01x info %04x\n",
@@ -1151,9 +1359,6 @@
 			ret = init_iommu_one(iommu, h);
 			if (ret)
 				return ret;
-			break;
-		default:
-			break;
 		}
 		p += h->length;
 
@@ -1818,18 +2023,20 @@
  * remapping setup code.
  *
  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
- * three times:
+ * four times:
  *
- *	1 pass) Find the highest PCI device id the driver has to handle.
+ *	1 pass) Discover the most comprehensive IVHD type to use.
+ *
+ *	2 pass) Find the highest PCI device id the driver has to handle.
  *		Upon this information the size of the data structures is
  *		determined that needs to be allocated.
  *
- *	2 pass) Initialize the data structures just allocated with the
+ *	3 pass) Initialize the data structures just allocated with the
  *		information in the ACPI table about available AMD IOMMUs
  *		in the system. It also maps the PCI devices in the
  *		system to specific IOMMUs
  *
- *	3 pass) After the basic data structures are allocated and
+ *	4 pass) After the basic data structures are allocated and
  *		initialized we update them with information about memory
  *		remapping requirements parsed out of the ACPI table in
  *		this last pass.
@@ -1857,6 +2064,17 @@
 	}
 
 	/*
+	 * Validate checksum here so we don't need to do it when
+	 * we actually parse the table
+	 */
+	ret = check_ivrs_checksum(ivrs_base);
+	if (ret)
+		return ret;
+
+	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
+	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
+
+	/*
 	 * First parse ACPI tables to find the largest Bus/Dev/Func
 	 * we need to handle. Upon this information the shared data
 	 * structures for the IOMMUs in the system will be allocated
@@ -2259,10 +2477,43 @@
 	return 1;
 }
 
+static int __init parse_ivrs_acpihid(char *str)
+{
+	u32 bus, dev, fn;
+	char *hid, *uid, *p;
+	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+	int ret, i;
+
+	ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str);
+		return 1;
+	}
+
+	p = acpiid;
+	hid = strsep(&p, ":");
+	uid = p;
+
+	if (!hid || !(*hid) || !uid) {
+		pr_err("AMD-Vi: Invalid command line: hid or uid\n");
+		return 1;
+	}
+
+	i = early_acpihid_map_size++;
+	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
+	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
+	early_acpihid_map[i].devid =
+		((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+	early_acpihid_map[i].cmd_line	= true;
+
+	return 1;
+}
+
 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
 __setup("amd_iommu=",		parse_amd_iommu_options);
 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
 __setup("ivrs_hpet",		parse_ivrs_hpet);
+__setup("ivrs_acpihid",		parse_ivrs_acpihid);
 
 IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 9d32b20..590956a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -527,6 +527,19 @@
 #endif
 };
 
+#define ACPIHID_UID_LEN 256
+#define ACPIHID_HID_LEN 9
+
+struct acpihid_map_entry {
+	struct list_head list;
+	u8 uid[ACPIHID_UID_LEN];
+	u8 hid[ACPIHID_HID_LEN];
+	u16 devid;
+	u16 root_devid;
+	bool cmd_line;
+	struct iommu_group *group;
+};
+
 struct devid_map {
 	struct list_head list;
 	u8 id;
@@ -537,6 +550,7 @@
 /* Map HPET and IOAPIC ids to the devid used by the IOMMU */
 extern struct list_head ioapic_map;
 extern struct list_head hpet_map;
+extern struct list_head acpihid_map;
 
 /*
  * List with all IOMMUs in the system. This list is not locked because it is
@@ -668,30 +682,4 @@
 	return -EINVAL;
 }
 
-#ifdef CONFIG_AMD_IOMMU_STATS
-
-struct __iommu_counter {
-	char *name;
-	struct dentry *dent;
-	u64 value;
-};
-
-#define DECLARE_STATS_COUNTER(nm) \
-	static struct __iommu_counter nm = {	\
-		.name = #nm,			\
-	}
-
-#define INC_STATS_COUNTER(name)		name.value += 1
-#define ADD_STATS_COUNTER(name, x)	name.value += (x)
-#define SUB_STATS_COUNTER(name, x)	name.value -= (x)
-
-#else /* CONFIG_AMD_IOMMU_STATS */
-
-#define DECLARE_STATS_COUNTER(name)
-#define INC_STATS_COUNTER(name)
-#define ADD_STATS_COUNTER(name, x)
-#define SUB_STATS_COUNTER(name, x)
-
-#endif /* CONFIG_AMD_IOMMU_STATS */
-
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 4ff73ff..ebab33e 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -590,6 +590,7 @@
 
 	unsigned long			ias; /* IPA */
 	unsigned long			oas; /* PA */
+	unsigned long			pgsize_bitmap;
 
 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
 	unsigned int			asid_bits;
@@ -1516,8 +1517,6 @@
 	return 0;
 }
 
-static struct iommu_ops arm_smmu_ops;
-
 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 {
 	int ret;
@@ -1555,7 +1554,7 @@
 	}
 
 	pgtbl_cfg = (struct io_pgtable_cfg) {
-		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
+		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
@@ -1566,7 +1565,7 @@
 	if (!pgtbl_ops)
 		return -ENOMEM;
 
-	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 	smmu_domain->pgtbl_ops = pgtbl_ops;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -2410,7 +2409,6 @@
 {
 	u32 reg;
 	bool coherent;
-	unsigned long pgsize_bitmap = 0;
 
 	/* IDR0 */
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
@@ -2541,13 +2539,16 @@
 
 	/* Page sizes */
 	if (reg & IDR5_GRAN64K)
-		pgsize_bitmap |= SZ_64K | SZ_512M;
+		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 	if (reg & IDR5_GRAN16K)
-		pgsize_bitmap |= SZ_16K | SZ_32M;
+		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
 	if (reg & IDR5_GRAN4K)
-		pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
-	arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
 
 	/* Output address size */
 	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7c39ac4..0360919 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -34,6 +34,7 @@
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
@@ -71,16 +72,15 @@
 		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
 			? 0x400 : 0))
 
+/*
+ * Some 64-bit registers only make sense to write atomically, but in such
+ * cases all the data relevant to AArch32 formats lies within the lower word,
+ * therefore this actually makes more sense than it might first appear.
+ */
 #ifdef CONFIG_64BIT
-#define smmu_writeq	writeq_relaxed
+#define smmu_write_atomic_lq		writeq_relaxed
 #else
-#define smmu_writeq(reg64, addr)				\
-	do {							\
-		u64 __val = (reg64);				\
-		void __iomem *__addr = (addr);			\
-		writel_relaxed(__val >> 32, __addr + 4);	\
-		writel_relaxed(__val, __addr);			\
-	} while (0)
+#define smmu_write_atomic_lq		writel_relaxed
 #endif
 
 /* Configuration registers */
@@ -94,9 +94,13 @@
 #define sCR0_VMIDPNE			(1 << 11)
 #define sCR0_PTM			(1 << 12)
 #define sCR0_FB				(1 << 13)
+#define sCR0_VMID16EN			(1 << 31)
 #define sCR0_BSU_SHIFT			14
 #define sCR0_BSU_MASK			0x3
 
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR		0x10
+
 /* Identification registers */
 #define ARM_SMMU_GR0_ID0		0x20
 #define ARM_SMMU_GR0_ID1		0x24
@@ -116,6 +120,8 @@
 #define ID0_NTS				(1 << 28)
 #define ID0_SMS				(1 << 27)
 #define ID0_ATOSNS			(1 << 26)
+#define ID0_PTFS_NO_AARCH32		(1 << 25)
+#define ID0_PTFS_NO_AARCH32S		(1 << 24)
 #define ID0_CTTW			(1 << 14)
 #define ID0_NUMIRPT_SHIFT		16
 #define ID0_NUMIRPT_MASK		0xff
@@ -141,6 +147,10 @@
 #define ID2_PTFS_4K			(1 << 12)
 #define ID2_PTFS_16K			(1 << 13)
 #define ID2_PTFS_64K			(1 << 14)
+#define ID2_VMID16			(1 << 15)
+
+#define ID7_MAJOR_SHIFT			4
+#define ID7_MAJOR_MASK			0xf
 
 /* Global TLB invalidation */
 #define ARM_SMMU_GR0_TLBIVMID		0x64
@@ -193,12 +203,15 @@
 #define ARM_SMMU_GR1_CBA2R(n)		(0x800 + ((n) << 2))
 #define CBA2R_RW64_32BIT		(0 << 0)
 #define CBA2R_RW64_64BIT		(1 << 0)
+#define CBA2R_VMID_SHIFT		16
+#define CBA2R_VMID_MASK			0xffff
 
 /* Translation context bank */
 #define ARM_SMMU_CB_BASE(smmu)		((smmu)->base + ((smmu)->size >> 1))
 #define ARM_SMMU_CB(smmu, n)		((n) * (1 << (smmu)->pgshift))
 
 #define ARM_SMMU_CB_SCTLR		0x0
+#define ARM_SMMU_CB_ACTLR		0x4
 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_CB_TTBCR2		0x10
 #define ARM_SMMU_CB_TTBR0		0x20
@@ -206,11 +219,9 @@
 #define ARM_SMMU_CB_TTBCR		0x30
 #define ARM_SMMU_CB_S1_MAIR0		0x38
 #define ARM_SMMU_CB_S1_MAIR1		0x3c
-#define ARM_SMMU_CB_PAR_LO		0x50
-#define ARM_SMMU_CB_PAR_HI		0x54
+#define ARM_SMMU_CB_PAR			0x50
 #define ARM_SMMU_CB_FSR			0x58
-#define ARM_SMMU_CB_FAR_LO		0x60
-#define ARM_SMMU_CB_FAR_HI		0x64
+#define ARM_SMMU_CB_FAR			0x60
 #define ARM_SMMU_CB_FSYNR0		0x68
 #define ARM_SMMU_CB_S1_TLBIVA		0x600
 #define ARM_SMMU_CB_S1_TLBIASID		0x610
@@ -230,6 +241,10 @@
 #define SCTLR_M				(1 << 0)
 #define SCTLR_EAE_SBOP			(SCTLR_AFE | SCTLR_TRE)
 
+#define ARM_MMU500_ACTLR_CPRE		(1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
+
 #define CB_PAR_F			(1 << 0)
 
 #define ATSR_ACTIVE			(1 << 0)
@@ -270,10 +285,17 @@
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum arm_smmu_arch_version {
-	ARM_SMMU_V1 = 1,
+	ARM_SMMU_V1,
+	ARM_SMMU_V1_64K,
 	ARM_SMMU_V2,
 };
 
+enum arm_smmu_implementation {
+	GENERIC_SMMU,
+	ARM_MMU500,
+	CAVIUM_SMMUV2,
+};
+
 struct arm_smmu_smr {
 	u8				idx;
 	u16				mask;
@@ -305,11 +327,18 @@
 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)
 #define ARM_SMMU_FEAT_TRANS_OPS		(1 << 5)
+#define ARM_SMMU_FEAT_VMID16		(1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K	(1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K	(1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)
 	u32				features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
 	u32				options;
 	enum arm_smmu_arch_version	version;
+	enum arm_smmu_implementation	model;
 
 	u32				num_context_banks;
 	u32				num_s2_context_banks;
@@ -322,6 +351,7 @@
 	unsigned long			va_size;
 	unsigned long			ipa_size;
 	unsigned long			pa_size;
+	unsigned long			pgsize_bitmap;
 
 	u32				num_global_irqs;
 	u32				num_context_irqs;
@@ -329,17 +359,27 @@
 
 	struct list_head		list;
 	struct rb_root			masters;
+
+	u32				cavium_id_base; /* Specific to Cavium */
+};
+
+enum arm_smmu_context_fmt {
+	ARM_SMMU_CTX_FMT_NONE,
+	ARM_SMMU_CTX_FMT_AARCH64,
+	ARM_SMMU_CTX_FMT_AARCH32_L,
+	ARM_SMMU_CTX_FMT_AARCH32_S,
 };
 
 struct arm_smmu_cfg {
 	u8				cbndx;
 	u8				irptndx;
 	u32				cbar;
+	enum arm_smmu_context_fmt	fmt;
 };
 #define INVALID_IRPTNDX			0xff
 
-#define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)
-#define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1)
+#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
+#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
 
 enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_S1 = 0,
@@ -357,8 +397,6 @@
 	struct iommu_domain		domain;
 };
 
-static struct iommu_ops arm_smmu_ops;
-
 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
 static LIST_HEAD(arm_smmu_devices);
 
@@ -367,6 +405,8 @@
 	const char *prop;
 };
 
+static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
+
 static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
 	{ 0, NULL},
@@ -578,11 +618,11 @@
 
 	if (stage1) {
 		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
-		writel_relaxed(ARM_SMMU_CB_ASID(cfg),
+		writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
 			       base + ARM_SMMU_CB_S1_TLBIASID);
 	} else {
 		base = ARM_SMMU_GR0(smmu);
-		writel_relaxed(ARM_SMMU_CB_VMID(cfg),
+		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
 			       base + ARM_SMMU_GR0_TLBIVMID);
 	}
 
@@ -602,37 +642,33 @@
 		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
-		if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
+		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
 			iova &= ~12UL;
-			iova |= ARM_SMMU_CB_ASID(cfg);
+			iova |= ARM_SMMU_CB_ASID(smmu, cfg);
 			do {
 				writel_relaxed(iova, reg);
 				iova += granule;
 			} while (size -= granule);
-#ifdef CONFIG_64BIT
 		} else {
 			iova >>= 12;
-			iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48;
+			iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
 			do {
 				writeq_relaxed(iova, reg);
 				iova += granule >> 12;
 			} while (size -= granule);
-#endif
 		}
-#ifdef CONFIG_64BIT
 	} else if (smmu->version == ARM_SMMU_V2) {
 		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 			      ARM_SMMU_CB_S2_TLBIIPAS2;
 		iova >>= 12;
 		do {
-			writeq_relaxed(iova, reg);
+			smmu_write_atomic_lq(iova, reg);
 			iova += granule >> 12;
 		} while (size -= granule);
-#endif
 	} else {
 		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
-		writel_relaxed(ARM_SMMU_CB_VMID(cfg), reg);
+		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
 	}
 }
 
@@ -645,7 +681,7 @@
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
 	int flags, ret;
-	u32 fsr, far, fsynr, resume;
+	u32 fsr, fsynr, resume;
 	unsigned long iova;
 	struct iommu_domain *domain = dev;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -667,13 +703,7 @@
 	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
 	flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
 
-	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_LO);
-	iova = far;
-#ifdef CONFIG_64BIT
-	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_HI);
-	iova |= ((unsigned long)far << 32);
-#endif
-
+	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
 	if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
 		ret = IRQ_HANDLED;
 		resume = RESUME_RETRY;
@@ -734,22 +764,20 @@
 	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 
 	if (smmu->version > ARM_SMMU_V1) {
-		/*
-		 * CBA2R.
-		 * *Must* be initialised before CBAR thanks to VMID16
-		 * architectural oversight affected some implementations.
-		 */
-#ifdef CONFIG_64BIT
-		reg = CBA2R_RW64_64BIT;
-#else
-		reg = CBA2R_RW64_32BIT;
-#endif
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+			reg = CBA2R_RW64_64BIT;
+		else
+			reg = CBA2R_RW64_32BIT;
+		/* 16-bit VMIDs live in CBA2R */
+		if (smmu->features & ARM_SMMU_FEAT_VMID16)
+			reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
+
 		writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
 	}
 
 	/* CBAR */
 	reg = cfg->cbar;
-	if (smmu->version == ARM_SMMU_V1)
+	if (smmu->version < ARM_SMMU_V2)
 		reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 
 	/*
@@ -759,8 +787,9 @@
 	if (stage1) {
 		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
 			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
-	} else {
-		reg |= ARM_SMMU_CB_VMID(cfg) << CBAR_VMID_SHIFT;
+	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
+		/* 8-bit VMIDs live in CBAR */
+		reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
 	}
 	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
 
@@ -768,15 +797,15 @@
 	if (stage1) {
 		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
 
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 
 		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
 	} else {
 		reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 	}
 
 	/* TTBCR */
@@ -855,16 +884,40 @@
 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
+	/*
+	 * Choosing a suitable context format is even more fiddly. Until we
+	 * grow some way for the caller to express a preference, and/or move
+	 * the decision into the io-pgtable code where it arguably belongs,
+	 * just aim for the closest thing to the rest of the system, and hope
+	 * that the hardware isn't esoteric enough that we can't assume AArch64
+	 * support to be a superset of AArch32 support...
+	 */
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
+		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
+	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
+			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
+			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
+		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
+
+	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
 		start = smmu->num_s2_context_banks;
 		ias = smmu->va_size;
 		oas = smmu->ipa_size;
-		if (IS_ENABLED(CONFIG_64BIT))
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 			fmt = ARM_64_LPAE_S1;
-		else
+		} else {
 			fmt = ARM_32_LPAE_S1;
+			ias = min(ias, 32UL);
+			oas = min(oas, 40UL);
+		}
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 		/*
@@ -876,10 +929,13 @@
 		start = 0;
 		ias = smmu->ipa_size;
 		oas = smmu->pa_size;
-		if (IS_ENABLED(CONFIG_64BIT))
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 			fmt = ARM_64_LPAE_S2;
-		else
+		} else {
 			fmt = ARM_32_LPAE_S2;
+			ias = min(ias, 40UL);
+			oas = min(oas, 40UL);
+		}
 		break;
 	default:
 		ret = -EINVAL;
@@ -892,7 +948,7 @@
 		goto out_unlock;
 
 	cfg->cbndx = ret;
-	if (smmu->version == ARM_SMMU_V1) {
+	if (smmu->version < ARM_SMMU_V2) {
 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
 		cfg->irptndx %= smmu->num_context_irqs;
 	} else {
@@ -900,7 +956,7 @@
 	}
 
 	pgtbl_cfg = (struct io_pgtable_cfg) {
-		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
+		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
@@ -914,8 +970,8 @@
 		goto out_clear_smmu;
 	}
 
-	/* Update our support page sizes to reflect the page table format */
-	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	/* Update the domain's page sizes to reflect the page table format */
+	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 
 	/* Initialise the context bank with our page table cfg */
 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
@@ -1252,8 +1308,8 @@
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
 	if (smmu->version == ARM_SMMU_V2)
-		smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
-	else
+		smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
+	else /* Register is only 32-bit in v1 */
 		writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
 
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
@@ -1264,9 +1320,7 @@
 		return ops->iova_to_phys(ops, iova);
 	}
 
-	phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
-	phys |= ((u64)readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
-
+	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
 	if (phys & CB_PAR_F) {
 		dev_err(dev, "translation fault!\n");
 		dev_err(dev, "PAR = 0x%llx\n", phys);
@@ -1492,7 +1546,7 @@
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	void __iomem *cb_base;
 	int i = 0;
-	u32 reg;
+	u32 reg, major;
 
 	/* clear global FSR */
 	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
@@ -1505,11 +1559,33 @@
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
 	}
 
+	/*
+	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+	 * bit is only present in MMU-500r2 onwards.
+	 */
+	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
+	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
+		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
+		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
+	}
+
 	/* Make sure all context banks are disabled and clear CB_FSR  */
 	for (i = 0; i < smmu->num_context_banks; ++i) {
 		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
 		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
+		/*
+		 * Disable MMU-500's not-particularly-beneficial next-page
+		 * prefetcher for the sake of errata #841119 and #826419.
+		 */
+		if (smmu->model == ARM_MMU500) {
+			reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
+			reg &= ~ARM_MMU500_ACTLR_CPRE;
+			writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
+		}
 	}
 
 	/* Invalidate the TLB, just in case */
@@ -1537,6 +1613,9 @@
 	/* Don't upgrade barriers */
 	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
 
+	if (smmu->features & ARM_SMMU_FEAT_VMID16)
+		reg |= sCR0_VMID16EN;
+
 	/* Push the button */
 	__arm_smmu_tlb_sync(smmu);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -1569,7 +1648,8 @@
 	bool cttw_dt, cttw_reg;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
-	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
+	dev_notice(smmu->dev, "SMMUv%d with:\n",
+			smmu->version == ARM_SMMU_V2 ? 2 : 1);
 
 	/* ID0 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
@@ -1601,7 +1681,8 @@
 		return -ENODEV;
 	}
 
-	if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) {
+	if ((id & ID0_S1TS) &&
+		((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
 		dev_notice(smmu->dev, "\taddress translation ops\n");
 	}
@@ -1657,6 +1738,12 @@
 					   ID0_NUMSIDB_MASK;
 	}
 
+	if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
+		if (!(id & ID0_PTFS_NO_AARCH32S))
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
+	}
+
 	/* ID1 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
 	smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
@@ -1677,6 +1764,17 @@
 	}
 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
 		   smmu->num_context_banks, smmu->num_s2_context_banks);
+	/*
+	 * Cavium CN88xx erratum #27704.
+	 * Ensure ASID and VMID allocation is unique across all SMMUs in
+	 * the system.
+	 */
+	if (smmu->model == CAVIUM_SMMUV2) {
+		smmu->cavium_id_base =
+			atomic_add_return(smmu->num_context_banks,
+					  &cavium_smmu_context_count);
+		smmu->cavium_id_base -= smmu->num_context_banks;
+	}
 
 	/* ID2 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
@@ -1687,6 +1785,9 @@
 	size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
 	smmu->pa_size = size;
 
+	if (id & ID2_VMID16)
+		smmu->features |= ARM_SMMU_FEAT_VMID16;
+
 	/*
 	 * What the page table walker can address actually depends on which
 	 * descriptor format is in use, but since a) we don't know that yet,
@@ -1696,26 +1797,39 @@
 		dev_warn(smmu->dev,
 			 "failed to set DMA mask for table walker\n");
 
-	if (smmu->version == ARM_SMMU_V1) {
+	if (smmu->version < ARM_SMMU_V2) {
 		smmu->va_size = smmu->ipa_size;
-		size = SZ_4K | SZ_2M | SZ_1G;
+		if (smmu->version == ARM_SMMU_V1_64K)
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
 	} else {
 		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
 		smmu->va_size = arm_smmu_id_size_to_bits(size);
-#ifndef CONFIG_64BIT
-		smmu->va_size = min(32UL, smmu->va_size);
-#endif
-		size = 0;
 		if (id & ID2_PTFS_4K)
-			size |= SZ_4K | SZ_2M | SZ_1G;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
 		if (id & ID2_PTFS_16K)
-			size |= SZ_16K | SZ_32M;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
 		if (id & ID2_PTFS_64K)
-			size |= SZ_64K | SZ_512M;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
 	}
 
-	arm_smmu_ops.pgsize_bitmap &= size;
-	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size);
+	/* Now we've corralled the various formats, what'll it do? */
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
+		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+	if (smmu->features &
+	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
+		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
+		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
+		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
+
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
+		   smmu->pgsize_bitmap);
+
 
 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
@@ -1728,12 +1842,27 @@
 	return 0;
 }
 
+struct arm_smmu_match_data {
+	enum arm_smmu_arch_version version;
+	enum arm_smmu_implementation model;
+};
+
+#define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
+static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+
+ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
+ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
+
 static const struct of_device_id arm_smmu_of_match[] = {
-	{ .compatible = "arm,smmu-v1", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,smmu-v2", .data = (void *)ARM_SMMU_V2 },
-	{ .compatible = "arm,mmu-400", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,mmu-401", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,mmu-500", .data = (void *)ARM_SMMU_V2 },
+	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
+	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
+	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
+	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
+	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
+	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
@@ -1741,6 +1870,7 @@
 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
+	const struct arm_smmu_match_data *data;
 	struct resource *res;
 	struct arm_smmu_device *smmu;
 	struct device *dev = &pdev->dev;
@@ -1756,7 +1886,9 @@
 	smmu->dev = dev;
 
 	of_id = of_match_node(arm_smmu_of_match, dev->of_node);
-	smmu->version = (enum arm_smmu_arch_version)of_id->data;
+	data = of_id->data;
+	smmu->version = data->version;
+	smmu->model = data->model;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	smmu->base = devm_ioremap_resource(dev, res);
@@ -1822,7 +1954,7 @@
 
 	parse_driver_options(smmu);
 
-	if (smmu->version > ARM_SMMU_V1 &&
+	if (smmu->version == ARM_SMMU_V2 &&
 	    smmu->num_context_banks != smmu->num_context_irqs) {
 		dev_err(dev,
 			"found only %d context interrupt(s) but %d required\n",
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 58f2fe6..ea5a9eb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -94,7 +94,7 @@
 		return -ENODEV;
 
 	/* Use the smallest supported page size for IOVA granularity */
-	order = __ffs(domain->ops->pgsize_bitmap);
+	order = __ffs(domain->pgsize_bitmap);
 	base_pfn = max_t(unsigned long, 1, base >> order);
 	end_pfn = (base + size - 1) >> order;
 
@@ -190,11 +190,15 @@
 	kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count,
+		unsigned long order_mask, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
-	unsigned int order = MAX_ORDER;
+
+	order_mask &= (2U << MAX_ORDER) - 1;
+	if (!order_mask)
+		return NULL;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -208,36 +212,38 @@
 
 	while (count) {
 		struct page *page = NULL;
-		int j;
+		unsigned int order_size;
 
 		/*
 		 * Higher-order allocations are a convenience rather
 		 * than a necessity, hence using __GFP_NORETRY until
-		 * falling back to single-page allocations.
+		 * falling back to minimum-order allocations.
 		 */
-		for (order = min_t(unsigned int, order, __fls(count));
-		     order > 0; order--) {
-			page = alloc_pages(gfp | __GFP_NORETRY, order);
+		for (order_mask &= (2U << __fls(count)) - 1;
+		     order_mask; order_mask &= ~order_size) {
+			unsigned int order = __fls(order_mask);
+
+			order_size = 1U << order;
+			page = alloc_pages((order_mask - order_size) ?
+					   gfp | __GFP_NORETRY : gfp, order);
 			if (!page)
 				continue;
-			if (PageCompound(page)) {
-				if (!split_huge_page(page))
-					break;
-				__free_pages(page, order);
-			} else {
+			if (!order)
+				break;
+			if (!PageCompound(page)) {
 				split_page(page, order);
 				break;
+			} else if (!split_huge_page(page)) {
+				break;
 			}
+			__free_pages(page, order);
 		}
-		if (!page)
-			page = alloc_page(gfp);
 		if (!page) {
 			__iommu_dma_free_pages(pages, i);
 			return NULL;
 		}
-		j = 1 << order;
-		count -= j;
-		while (j--)
+		count -= order_size;
+		while (order_size--)
 			pages[i++] = page++;
 	}
 	return pages;
@@ -267,6 +273,7 @@
  *	 attached to an iommu_dma_domain
  * @size: Size of buffer in bytes
  * @gfp: Allocation flags
+ * @attrs: DMA attributes for this allocation
  * @prot: IOMMU mapping flags
  * @handle: Out argument for allocated DMA handle
  * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
@@ -278,8 +285,8 @@
  * Return: Array of struct page pointers describing the buffer,
  *	   or NULL on failure.
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -288,11 +295,22 @@
 	struct page **pages;
 	struct sg_table sgt;
 	dma_addr_t dma_addr;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
 
-	pages = __iommu_dma_alloc_pages(count, gfp);
+	min_size = alloc_sizes & -alloc_sizes;
+	if (min_size < PAGE_SIZE) {
+		min_size = PAGE_SIZE;
+		alloc_sizes |= PAGE_SIZE;
+	} else {
+		size = ALIGN(size, min_size);
+	}
+	if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+		alloc_sizes = min_size;
+
+	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
 	if (!pages)
 		return NULL;
 
@@ -389,26 +407,58 @@
 
 /*
  * Prepare a successfully-mapped scatterlist to give back to the caller.
- * Handling IOVA concatenation can come later, if needed
+ *
+ * At this point the segments are already laid out by iommu_dma_map_sg() to
+ * avoid individually crossing any boundaries, so we merely need to check a
+ * segment's start address to avoid concatenating across one.
  */
 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		dma_addr_t dma_addr)
 {
-	struct scatterlist *s;
-	int i;
+	struct scatterlist *s, *cur = sg;
+	unsigned long seg_mask = dma_get_seg_boundary(dev);
+	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
+	int i, count = 0;
 
 	for_each_sg(sg, s, nents, i) {
-		/* Un-swizzling the fields here, hence the naming mismatch */
-		unsigned int s_offset = sg_dma_address(s);
+		/* Restore this segment's original unaligned fields first */
+		unsigned int s_iova_off = sg_dma_address(s);
 		unsigned int s_length = sg_dma_len(s);
-		unsigned int s_dma_len = s->length;
+		unsigned int s_iova_len = s->length;
 
-		s->offset += s_offset;
+		s->offset += s_iova_off;
 		s->length = s_length;
-		sg_dma_address(s) = dma_addr + s_offset;
-		dma_addr += s_dma_len;
+		sg_dma_address(s) = DMA_ERROR_CODE;
+		sg_dma_len(s) = 0;
+
+		/*
+		 * Now fill in the real DMA data. If...
+		 * - there is a valid output segment to append to
+		 * - and this segment starts on an IOVA page boundary
+		 * - but doesn't fall at a segment boundary
+		 * - and wouldn't make the resulting output segment too long
+		 */
+		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
+		    (cur_len + s_length <= max_len)) {
+			/* ...then concatenate it with the previous one */
+			cur_len += s_length;
+		} else {
+			/* Otherwise start the next output segment */
+			if (i > 0)
+				cur = sg_next(cur);
+			cur_len = s_length;
+			count++;
+
+			sg_dma_address(cur) = dma_addr + s_iova_off;
+		}
+
+		sg_dma_len(cur) = cur_len;
+		dma_addr += s_iova_len;
+
+		if (s_length + s_iova_off < s_iova_len)
+			cur_len = 0;
 	}
-	return i;
+	return count;
 }
 
 /*
@@ -446,34 +496,40 @@
 	struct scatterlist *s, *prev = NULL;
 	dma_addr_t dma_addr;
 	size_t iova_len = 0;
+	unsigned long mask = dma_get_seg_boundary(dev);
 	int i;
 
 	/*
 	 * Work out how much IOVA space we need, and align the segments to
 	 * IOVA granules for the IOMMU driver to handle. With some clever
 	 * trickery we can modify the list in-place, but reversibly, by
-	 * hiding the original data in the as-yet-unused DMA fields.
+	 * stashing the unaligned parts in the as-yet-unused DMA fields.
 	 */
 	for_each_sg(sg, s, nents, i) {
-		size_t s_offset = iova_offset(iovad, s->offset);
+		size_t s_iova_off = iova_offset(iovad, s->offset);
 		size_t s_length = s->length;
+		size_t pad_len = (mask - iova_len + 1) & mask;
 
-		sg_dma_address(s) = s_offset;
+		sg_dma_address(s) = s_iova_off;
 		sg_dma_len(s) = s_length;
-		s->offset -= s_offset;
-		s_length = iova_align(iovad, s_length + s_offset);
+		s->offset -= s_iova_off;
+		s_length = iova_align(iovad, s_length + s_iova_off);
 		s->length = s_length;
 
 		/*
-		 * The simple way to avoid the rare case of a segment
-		 * crossing the boundary mask is to pad the previous one
-		 * to end at a naturally-aligned IOVA for this one's size,
-		 * at the cost of potentially over-allocating a little.
+		 * Due to the alignment of our single IOVA allocation, we can
+		 * depend on these assumptions about the segment boundary mask:
+		 * - If mask size >= IOVA size, then the IOVA range cannot
+		 *   possibly fall across a boundary, so we don't care.
+		 * - If mask size < IOVA size, then the IOVA range must start
+		 *   exactly on a boundary, therefore we can lay things out
+		 *   based purely on segment lengths without needing to know
+		 *   the actual addresses beforehand.
+		 * - The mask must be a power of 2, so pad_len == 0 if
+		 *   iova_len == 0, thus we cannot dereference prev the first
+		 *   time through here (i.e. before it has a meaningful value).
 		 */
-		if (prev) {
-			size_t pad_len = roundup_pow_of_two(s_length);
-
-			pad_len = (pad_len - iova_len) & (pad_len - 1);
+		if (pad_len && pad_len < s_length - 1) {
 			prev->length += pad_len;
 			iova_len += pad_len;
 		}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8ffd756..6a86b5d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1579,18 +1579,14 @@
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
 	if (fault_type == INTR_REMAP)
-		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
-		       "fault index %llx\n"
-			"INTR-REMAP:[fault reason %02d] %s\n",
-			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
+			source_id >> 8, PCI_SLOT(source_id & 0xFF),
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
 	else
-		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
-		       "fault addr %llx \n"
-		       "DMAR:[fault reason %02d] %s\n",
-		       (type ? "DMA Read" : "DMA Write"),
-		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
+		       type ? "DMA Read" : "DMA Write",
+		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
 		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
 	return 0;
 }
@@ -1602,10 +1598,17 @@
 	int reg, fault_index;
 	u32 fault_status;
 	unsigned long flag;
+	bool ratelimited;
+	static DEFINE_RATELIMIT_STATE(rs,
+				      DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+
+	/* Disable printing, simply clear the fault when ratelimited */
+	ratelimited = !__ratelimit(&rs);
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	if (fault_status)
+	if (fault_status && !ratelimited)
 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
 
 	/* TBD: ignore advanced fault log currently */
@@ -1627,24 +1630,28 @@
 		if (!(data & DMA_FRCD_F))
 			break;
 
-		fault_reason = dma_frcd_fault_reason(data);
-		type = dma_frcd_type(data);
+		if (!ratelimited) {
+			fault_reason = dma_frcd_fault_reason(data);
+			type = dma_frcd_type(data);
 
-		data = readl(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN + 8);
-		source_id = dma_frcd_source_id(data);
+			data = readl(iommu->reg + reg +
+				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
+			source_id = dma_frcd_source_id(data);
 
-		guest_addr = dmar_readq(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN);
-		guest_addr = dma_frcd_page_addr(guest_addr);
+			guest_addr = dmar_readq(iommu->reg + reg +
+					fault_index * PRIMARY_FAULT_REG_LEN);
+			guest_addr = dma_frcd_page_addr(guest_addr);
+		}
+
 		/* clear the fault */
 		writel(DMA_FRCD_F, iommu->reg + reg +
 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
 
 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
-		dmar_fault_do_one(iommu, type, fault_reason,
-				source_id, guest_addr);
+		if (!ratelimited)
+			dmar_fault_do_one(iommu, type, fault_reason,
+					  source_id, guest_addr);
 
 		fault_index++;
 		if (fault_index >= cap_num_fault_regs(iommu->cap))
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index e1852e8..b2bfb95 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1143,7 +1143,7 @@
 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 }
 
-/* free page table pages. last level pte should already be cleared */
+/* clear last level (leaf) ptes and free page table pages. */
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
 				   unsigned long start_pfn,
 				   unsigned long last_pfn)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 9488e3c..8c61399 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -121,6 +121,8 @@
 #define ARM_V7S_TEX_MASK		0x7
 #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 
+#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
+
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
 #define ARM_V7S_CONT_PAGE_TEX_MASK	(ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
@@ -258,9 +260,10 @@
 					 struct io_pgtable_cfg *cfg)
 {
 	bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
-	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S |
-			    ARM_V7S_ATTR_TEX(1);
+	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
 
+	if (!(prot & IOMMU_MMIO))
+		pte |= ARM_V7S_ATTR_TEX(1);
 	if (ap) {
 		pte |= ARM_V7S_PTE_AF | ARM_V7S_PTE_AP_UNPRIV;
 		if (!(prot & IOMMU_WRITE))
@@ -270,7 +273,9 @@
 
 	if ((prot & IOMMU_NOEXEC) && ap)
 		pte |= ARM_V7S_ATTR_XN(lvl);
-	if (prot & IOMMU_CACHE)
+	if (prot & IOMMU_MMIO)
+		pte |= ARM_V7S_ATTR_B;
+	else if (prot & IOMMU_CACHE)
 		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 
 	return pte;
@@ -279,10 +284,13 @@
 static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
 {
 	int prot = IOMMU_READ;
+	arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 
-	if (pte & (ARM_V7S_PTE_AP_RDONLY << ARM_V7S_ATTR_SHIFT(lvl)))
+	if (attr & ARM_V7S_PTE_AP_RDONLY)
 		prot |= IOMMU_WRITE;
-	if (pte & ARM_V7S_ATTR_C)
+	if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
+		prot |= IOMMU_MMIO;
+	else if (pte & ARM_V7S_ATTR_C)
 		prot |= IOMMU_CACHE;
 
 	return prot;
@@ -364,6 +372,9 @@
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+		pte |= ARM_V7S_ATTR_MTK_4GB;
+
 	if (num_entries > 1)
 		pte = arm_v7s_pte_to_cont(pte, lvl);
 
@@ -625,9 +636,15 @@
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
-			    IO_PGTABLE_QUIRK_TLBI_ON_MAP))
+			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+			    IO_PGTABLE_QUIRK_ARM_MTK_4GB))
 		return NULL;
 
+	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
+	    !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
+			return NULL;
+
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return NULL;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f433b51..a1ed1b7 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -355,7 +355,10 @@
 		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 			pte |= ARM_LPAE_PTE_AP_RDONLY;
 
-		if (prot & IOMMU_CACHE)
+		if (prot & IOMMU_MMIO)
+			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
+				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+		else if (prot & IOMMU_CACHE)
 			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
 				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
 	} else {
@@ -364,7 +367,9 @@
 			pte |= ARM_LPAE_PTE_HAP_READ;
 		if (prot & IOMMU_WRITE)
 			pte |= ARM_LPAE_PTE_HAP_WRITE;
-		if (prot & IOMMU_CACHE)
+		if (prot & IOMMU_MMIO)
+			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
+		else if (prot & IOMMU_CACHE)
 			pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
 		else
 			pte |= ARM_LPAE_PTE_MEMATTR_NC;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 876f6a7..127558d 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -25,8 +25,7 @@
 #include "io-pgtable.h"
 
 static const struct io_pgtable_init_fns *
-io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] =
-{
+io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE
 	[ARM_32_LPAE_S1] = &io_pgtable_arm_32_lpae_s1_init_fns,
 	[ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns,
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index d4f5027..969d82c 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -60,10 +60,16 @@
 	 * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
 	 *	(unmapped) entries but the hardware might do so anyway, perform
 	 *	TLB maintenance when mapping as well as when unmapping.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
+	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
+	 *	when the SoC is in "4GB mode" and they can only access the high
+	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
+	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b9df141..3000051 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -337,9 +337,9 @@
 	if (!domain || domain->type != IOMMU_DOMAIN_DMA)
 		return 0;
 
-	BUG_ON(!domain->ops->pgsize_bitmap);
+	BUG_ON(!domain->pgsize_bitmap);
 
-	pg_size = 1UL << __ffs(domain->ops->pgsize_bitmap);
+	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
 	INIT_LIST_HEAD(&mappings);
 
 	iommu_get_dm_regions(dev, &mappings);
@@ -660,8 +660,8 @@
 }
 
 /*
- * Look for aliases to or from the given device for exisiting groups.  The
- * dma_alias_devfn only supports aliases on the same bus, therefore the search
+ * Look for aliases to or from the given device for existing groups. DMA
+ * aliases are only supported on the same bus, therefore the search
  * space is quite small (especially since we're really only looking at pcie
  * device, and therefore only expect multiple slots on the root complex or
  * downstream switch ports).  It's conceivable though that a pair of
@@ -686,11 +686,7 @@
 			continue;
 
 		/* We alias them or they alias us */
-		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     pdev->dma_alias_devfn == tmp->devfn) ||
-		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     tmp->dma_alias_devfn == pdev->devfn)) {
-
+		if (pci_devs_are_dma_aliases(pdev, tmp)) {
 			group = get_pci_alias_group(tmp, devfns);
 			if (group) {
 				pci_dev_put(tmp);
@@ -1073,6 +1069,8 @@
 
 	domain->ops  = bus->iommu_ops;
 	domain->type = type;
+	/* Assume all sizes by default; the driver may override this later */
+	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
 
 	return domain;
 }
@@ -1297,7 +1295,7 @@
 	pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
 	/* throw away page sizes not supported by the hardware */
-	pgsize &= domain->ops->pgsize_bitmap;
+	pgsize &= domain->pgsize_bitmap;
 
 	/* make sure we're still sane */
 	BUG_ON(!pgsize);
@@ -1319,14 +1317,14 @@
 	int ret = 0;
 
 	if (unlikely(domain->ops->map == NULL ||
-		     domain->ops->pgsize_bitmap == 0UL))
+		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return -EINVAL;
 
 	/* find out the minimum page size supported */
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	/*
 	 * both the virtual address and the physical one, as well as
@@ -1373,14 +1371,14 @@
 	unsigned long orig_iova = iova;
 
 	if (unlikely(domain->ops->unmap == NULL ||
-		     domain->ops->pgsize_bitmap == 0UL))
+		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return -EINVAL;
 
 	/* find out the minimum page size supported */
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	/*
 	 * The virtual address, as well as the size of the mapping, must be
@@ -1426,10 +1424,10 @@
 	unsigned int i, min_pagesz;
 	int ret;
 
-	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
+	if (unlikely(domain->pgsize_bitmap == 0UL))
 		return 0;
 
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	for_each_sg(sg, s, nents, i) {
 		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
@@ -1510,7 +1508,7 @@
 		break;
 	case DOMAIN_ATTR_PAGING:
 		paging  = data;
-		*paging = (domain->ops->pgsize_bitmap != 0UL);
+		*paging = (domain->pgsize_bitmap != 0UL);
 		break;
 	case DOMAIN_ATTR_WINDOWS:
 		count = data;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 929a66a..c3043d8 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -11,6 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+#include <linux/bootmem.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
@@ -56,7 +57,7 @@
 #define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
 
 #define REG_MMU_IVRP_PADDR			0x114
-#define F_MMU_IVRP_PA_SET(pa)			((pa) >> 1)
+#define F_MMU_IVRP_PA_SET(pa, ext)		(((pa) >> 1) | ((!!(ext)) << 31))
 
 #define REG_MMU_INT_CONTROL0			0x120
 #define F_L2_MULIT_HIT_EN			BIT(0)
@@ -125,6 +126,7 @@
 	struct mtk_iommu_domain		*m4u_dom;
 	struct iommu_group		*m4u_group;
 	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
+	bool                            enable_4GB;
 };
 
 static struct iommu_ops mtk_iommu_ops;
@@ -257,6 +259,9 @@
 		.iommu_dev = data->dev,
 	};
 
+	if (data->enable_4GB)
+		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
+
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
 		dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -264,7 +269,7 @@
 	}
 
 	/* Update our support page sizes bitmap */
-	mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+	dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;
 
 	writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
 	       data->base + REG_MMU_PT_BASE_ADDR);
@@ -530,7 +535,7 @@
 		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
 	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
 		       data->base + REG_MMU_IVRP_PADDR);
 
 	writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
@@ -591,6 +596,9 @@
 		return -ENOMEM;
 	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
 
+	/* Whether the current dram is over 4GB */
+	data->enable_4GB = !!(max_pfn > (0xffffffffUL >> PAGE_SHIFT));
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	data->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(data->base))
@@ -690,7 +698,7 @@
 	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
 	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
 	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
 		       base + REG_MMU_IVRP_PADDR);
 	return 0;
 }
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5fea665..af499ae 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -98,12 +98,12 @@
 struct of_iommu_node {
 	struct list_head list;
 	struct device_node *np;
-	struct iommu_ops *ops;
+	const struct iommu_ops *ops;
 };
 static LIST_HEAD(of_iommu_list);
 static DEFINE_SPINLOCK(of_iommu_lock);
 
-void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
+void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops)
 {
 	struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 
@@ -119,10 +119,10 @@
 	spin_unlock(&of_iommu_lock);
 }
 
-struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
 	struct of_iommu_node *node;
-	struct iommu_ops *ops = NULL;
+	const struct iommu_ops *ops = NULL;
 
 	spin_lock(&of_iommu_lock);
 	list_for_each_entry(node, &of_iommu_list, list)
@@ -134,12 +134,12 @@
 	return ops;
 }
 
-struct iommu_ops *of_iommu_configure(struct device *dev,
-				     struct device_node *master_np)
+const struct iommu_ops *of_iommu_configure(struct device *dev,
+					   struct device_node *master_np)
 {
 	struct of_phandle_args iommu_spec;
 	struct device_node *np;
-	struct iommu_ops *ops = NULL;
+	const struct iommu_ops *ops = NULL;
 	int idx = 0;
 
 	/*
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 9bc20e2..505548a 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -136,7 +136,7 @@
 			     struct seq_file *s)
 {
 	seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram,
-			  (cr->cam & MMU_CAM_P) ? 1 : 0);
+		   (cr->cam & MMU_CAM_P) ? 1 : 0);
 	return 0;
 }
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 3dc5b65..e2583cc 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -628,10 +628,12 @@
 		break;
 	default:
 		fn = NULL;
-		BUG();
 		break;
 	}
 
+	if (WARN_ON(!fn))
+		return -EINVAL;
+
 	prot = get_iopte_attr(e);
 
 	spin_lock(&obj->page_table_lock);
@@ -987,7 +989,6 @@
 {
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-	iopgtable_clear_entry_all(obj);
 	omap_iommu_debugfs_remove(obj);
 
 	pm_runtime_disable(obj->dev);
@@ -1161,7 +1162,8 @@
 	 * should never fail, but please keep this around to ensure
 	 * we keep the hardware happy
 	 */
-	BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE));
+	if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)))
+		goto fail_align;
 
 	clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE);
 	spin_lock_init(&omap_domain->lock);
@@ -1172,6 +1174,8 @@
 
 	return &omap_domain->domain;
 
+fail_align:
+	kfree(omap_domain->pgtable);
 fail_nomem:
 	kfree(omap_domain);
 out:
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 5710a06..c7d6156 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1049,6 +1049,8 @@
 
 	for (i = 0; i < pdev->num_resources; i++) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			continue;
 		iommu->bases[i] = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(iommu->bases[i]))
 			continue;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 81f88ad..46f10ec 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -253,3 +253,9 @@
 
 config PARTITION_PERCPU
 	bool
+
+config EZNPS_GIC
+	bool "NPS400 Global Interrupt Manager (GIM)"
+	select IRQ_DOMAIN
+	help
+	  Support the EZchip NPS400 global interrupt controller
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index f828244..38853a1 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -68,3 +68,4 @@
 obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
+obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
diff --git a/drivers/irqchip/irq-eznps.c b/drivers/irqchip/irq-eznps.c
new file mode 100644
index 0000000..efbf0e4
--- /dev/null
+++ b/drivers/irqchip/irq-eznps.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <soc/nps/common.h>
+
+#define NPS_NR_CPU_IRQS 8  /* number of interrupt lines of NPS400 CPU */
+#define NPS_TIMER0_IRQ  3
+
+/*
+ * NPS400 core includes an Interrupt Controller (IC) support.
+ * All cores can deactivate level irqs at first level control
+ * at cores mesh layer called MTM.
+ * For devices out side chip e.g. uart, network there is another
+ * level called Global Interrupt Manager (GIM).
+ * This second level can control level and edge interrupt.
+ *
+ * NOTE: AUX_IENABLE and CTOP_AUX_IACK are auxiliary registers
+ * with private HW copy per CPU.
+ */
+
+static void nps400_irq_mask(struct irq_data *irqd)
+{
+	unsigned int ienb;
+	unsigned int irq = irqd_to_hwirq(irqd);
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb &= ~(1 << irq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void nps400_irq_unmask(struct irq_data *irqd)
+{
+	unsigned int ienb;
+	unsigned int irq = irqd_to_hwirq(irqd);
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb |= (1 << irq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void nps400_irq_eoi_global(struct irq_data *irqd)
+{
+	unsigned int __maybe_unused irq = irqd_to_hwirq(irqd);
+
+	write_aux_reg(CTOP_AUX_IACK, 1 << irq);
+
+	/* Don't ack GIC before all device access attempts are done */
+	mb();
+
+	nps_ack_gic();
+}
+
+static void nps400_irq_eoi(struct irq_data *irqd)
+{
+	unsigned int __maybe_unused irq = irqd_to_hwirq(irqd);
+
+	write_aux_reg(CTOP_AUX_IACK, 1 << irq);
+}
+
+static struct irq_chip nps400_irq_chip_fasteoi = {
+	.name		= "NPS400 IC Global",
+	.irq_mask	= nps400_irq_mask,
+	.irq_unmask	= nps400_irq_unmask,
+	.irq_eoi	= nps400_irq_eoi_global,
+};
+
+static struct irq_chip nps400_irq_chip_percpu = {
+	.name		= "NPS400 IC",
+	.irq_mask	= nps400_irq_mask,
+	.irq_unmask	= nps400_irq_unmask,
+	.irq_eoi	= nps400_irq_eoi,
+};
+
+static int nps400_irq_map(struct irq_domain *d, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	switch (hw) {
+	case NPS_TIMER0_IRQ:
+#ifdef CONFIG_SMP
+	case NPS_IPI_IRQ:
+#endif
+		irq_set_percpu_devid(virq);
+		irq_set_chip_and_handler(virq, &nps400_irq_chip_percpu,
+					 handle_percpu_devid_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(virq, &nps400_irq_chip_fasteoi,
+					 handle_fasteoi_irq);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct irq_domain_ops nps400_irq_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = nps400_irq_map,
+};
+
+static int __init nps400_of_init(struct device_node *node,
+				 struct device_node *parent)
+{
+	static struct irq_domain *nps400_root_domain;
+
+	if (parent) {
+		pr_err("DeviceTree incore ic not a root irq controller\n");
+		return -EINVAL;
+	}
+
+	nps400_root_domain = irq_domain_add_linear(node, NPS_NR_CPU_IRQS,
+						   &nps400_irq_ops, NULL);
+
+	if (!nps400_root_domain) {
+		pr_err("nps400 root irq domain not avail\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
+	irq_set_default_host(nps400_root_domain);
+
+#ifdef CONFIG_SMP
+	irq_create_mapping(nps400_root_domain, NPS_IPI_IRQ);
+#endif
+
+	return 0;
+}
+IRQCHIP_DECLARE(ezchip_nps400_ic, "ezchip,nps400-ic", nps400_of_init);
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 97c0028..89e7423 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,19 @@
 
 #include "irq-gic-common.h"
 
+static const struct gic_kvm_info *gic_kvm_info;
+
+const struct gic_kvm_info *gic_get_kvm_info(void)
+{
+	return gic_kvm_info;
+}
+
+void gic_set_kvm_info(const struct gic_kvm_info *info)
+{
+	BUG_ON(gic_kvm_info != NULL);
+	gic_kvm_info = info;
+}
+
 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 		void *data)
 {
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697d..205e5fd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,7 @@
 
 #include <linux/of.h>
 #include <linux/irqdomain.h>
+#include <linux/irqchip/arm-gic-common.h>
 
 struct gic_quirk {
 	const char *desc;
@@ -35,4 +36,6 @@
 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 		void *data);
 
+void gic_set_kvm_info(const struct gic_kvm_info *info);
+
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 1a1ea4f..fb042ba 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -15,6 +15,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#define pr_fmt(fmt)	"GICv3: " fmt
+
 #include <linux/acpi.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
@@ -28,6 +30,7 @@
 #include <linux/slab.h>
 
 #include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-common.h>
 #include <linux/irqchip/arm-gic-v3.h>
 #include <linux/irqchip/irq-partition-percpu.h>
 
@@ -59,6 +62,8 @@
 static struct gic_chip_data gic_data __read_mostly;
 static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
 
+static struct gic_kvm_info gic_v3_kvm_info;
+
 #define gic_data_rdist()		(this_cpu_ptr(gic_data.rdists.rdist))
 #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
 #define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
@@ -1002,7 +1007,7 @@
 }
 
 /* Create all possible partitions at boot time */
-static void gic_populate_ppi_partitions(struct device_node *gic_node)
+static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
 {
 	struct device_node *parts_node, *child_part;
 	int part_idx = 0, i;
@@ -1089,6 +1094,30 @@
 	}
 }
 
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+	int ret;
+	struct resource r;
+	u32 gicv_idx;
+
+	gic_v3_kvm_info.type = GIC_V3;
+
+	gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+	if (!gic_v3_kvm_info.maint_irq)
+		return;
+
+	if (of_property_read_u32(node, "#redistributor-regions",
+				 &gicv_idx))
+		gicv_idx = 1;
+
+	gicv_idx += 3;	/* Also skip GICD, GICC, GICH */
+	ret = of_address_to_resource(node, gicv_idx, &r);
+	if (!ret)
+		gic_v3_kvm_info.vcpu = r;
+
+	gic_set_kvm_info(&gic_v3_kvm_info);
+}
+
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *dist_base;
@@ -1144,6 +1173,7 @@
 		goto out_unmap_rdist;
 
 	gic_populate_ppi_partitions(node);
+	gic_of_setup_kvm_info(node);
 	return 0;
 
 out_unmap_rdist:
@@ -1159,19 +1189,25 @@
 IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
 
 #ifdef CONFIG_ACPI
-static void __iomem *dist_base;
-static struct redist_region *redist_regs __initdata;
-static u32 nr_redist_regions __initdata;
-static bool single_redist;
+static struct
+{
+	void __iomem *dist_base;
+	struct redist_region *redist_regs;
+	u32 nr_redist_regions;
+	bool single_redist;
+	u32 maint_irq;
+	int maint_irq_mode;
+	phys_addr_t vcpu_base;
+} acpi_data __initdata;
 
 static void __init
 gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
 {
 	static int count = 0;
 
-	redist_regs[count].phys_base = phys_base;
-	redist_regs[count].redist_base = redist_base;
-	redist_regs[count].single_redist = single_redist;
+	acpi_data.redist_regs[count].phys_base = phys_base;
+	acpi_data.redist_regs[count].redist_base = redist_base;
+	acpi_data.redist_regs[count].single_redist = acpi_data.single_redist;
 	count++;
 }
 
@@ -1199,7 +1235,7 @@
 {
 	struct acpi_madt_generic_interrupt *gicc =
 				(struct acpi_madt_generic_interrupt *)header;
-	u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
 	u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
 	void __iomem *redist_base;
 
@@ -1216,7 +1252,7 @@
 	acpi_tbl_entry_handler redist_parser;
 	enum acpi_madt_type type;
 
-	if (single_redist) {
+	if (acpi_data.single_redist) {
 		type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
 		redist_parser = gic_acpi_parse_madt_gicc;
 	} else {
@@ -1267,14 +1303,14 @@
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
 				      gic_acpi_match_gicr, 0);
 	if (count > 0) {
-		single_redist = false;
+		acpi_data.single_redist = false;
 		return count;
 	}
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
 				      gic_acpi_match_gicc, 0);
 	if (count > 0)
-		single_redist = true;
+		acpi_data.single_redist = true;
 
 	return count;
 }
@@ -1294,36 +1330,117 @@
 	if (count <= 0)
 		return false;
 
-	nr_redist_regions = count;
+	acpi_data.nr_redist_regions = count;
 	return true;
 }
 
+static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
+						const unsigned long end)
+{
+	struct acpi_madt_generic_interrupt *gicc =
+		(struct acpi_madt_generic_interrupt *)header;
+	int maint_irq_mode;
+	static int first_madt = true;
+
+	/* Skip unusable CPUs */
+	if (!(gicc->flags & ACPI_MADT_ENABLED))
+		return 0;
+
+	maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+		ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+
+	if (first_madt) {
+		first_madt = false;
+
+		acpi_data.maint_irq = gicc->vgic_interrupt;
+		acpi_data.maint_irq_mode = maint_irq_mode;
+		acpi_data.vcpu_base = gicc->gicv_base_address;
+
+		return 0;
+	}
+
+	/*
+	 * The maintenance interrupt and GICV should be the same for every CPU
+	 */
+	if ((acpi_data.maint_irq != gicc->vgic_interrupt) ||
+	    (acpi_data.maint_irq_mode != maint_irq_mode) ||
+	    (acpi_data.vcpu_base != gicc->gicv_base_address))
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool __init gic_acpi_collect_virt_info(void)
+{
+	int count;
+
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+				      gic_acpi_parse_virt_madt_gicc, 0);
+
+	return (count > 0);
+}
+
 #define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE	(SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE	(SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+	int irq;
+
+	if (!gic_acpi_collect_virt_info()) {
+		pr_warn("Unable to get hardware information used for virtualization\n");
+		return;
+	}
+
+	gic_v3_kvm_info.type = GIC_V3;
+
+	irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+				acpi_data.maint_irq_mode,
+				ACPI_ACTIVE_HIGH);
+	if (irq <= 0)
+		return;
+
+	gic_v3_kvm_info.maint_irq = irq;
+
+	if (acpi_data.vcpu_base) {
+		struct resource *vcpu = &gic_v3_kvm_info.vcpu;
+
+		vcpu->flags = IORESOURCE_MEM;
+		vcpu->start = acpi_data.vcpu_base;
+		vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+	}
+
+	gic_set_kvm_info(&gic_v3_kvm_info);
+}
 
 static int __init
 gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
 {
 	struct acpi_madt_generic_distributor *dist;
 	struct fwnode_handle *domain_handle;
+	size_t size;
 	int i, err;
 
 	/* Get distributor base address */
 	dist = (struct acpi_madt_generic_distributor *)header;
-	dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE);
-	if (!dist_base) {
+	acpi_data.dist_base = ioremap(dist->base_address,
+				      ACPI_GICV3_DIST_MEM_SIZE);
+	if (!acpi_data.dist_base) {
 		pr_err("Unable to map GICD registers\n");
 		return -ENOMEM;
 	}
 
-	err = gic_validate_dist_version(dist_base);
+	err = gic_validate_dist_version(acpi_data.dist_base);
 	if (err) {
-		pr_err("No distributor detected at @%p, giving up", dist_base);
+		pr_err("No distributor detected at @%p, giving up",
+		       acpi_data.dist_base);
 		goto out_dist_unmap;
 	}
 
-	redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions,
-			      GFP_KERNEL);
-	if (!redist_regs) {
+	size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions;
+	acpi_data.redist_regs = kzalloc(size, GFP_KERNEL);
+	if (!acpi_data.redist_regs) {
 		err = -ENOMEM;
 		goto out_dist_unmap;
 	}
@@ -1332,29 +1449,31 @@
 	if (err)
 		goto out_redist_unmap;
 
-	domain_handle = irq_domain_alloc_fwnode(dist_base);
+	domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base);
 	if (!domain_handle) {
 		err = -ENOMEM;
 		goto out_redist_unmap;
 	}
 
-	err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0,
-			     domain_handle);
+	err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
+			     acpi_data.nr_redist_regions, 0, domain_handle);
 	if (err)
 		goto out_fwhandle_free;
 
 	acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+	gic_acpi_setup_kvm_info();
+
 	return 0;
 
 out_fwhandle_free:
 	irq_domain_free_fwnode(domain_handle);
 out_redist_unmap:
-	for (i = 0; i < nr_redist_regions; i++)
-		if (redist_regs[i].redist_base)
-			iounmap(redist_regs[i].redist_base);
-	kfree(redist_regs);
+	for (i = 0; i < acpi_data.nr_redist_regions; i++)
+		if (acpi_data.redist_regs[i].redist_base)
+			iounmap(acpi_data.redist_regs[i].redist_base);
+	kfree(acpi_data.redist_regs);
 out_dist_unmap:
-	iounmap(dist_base);
+	iounmap(acpi_data.dist_base);
 	return err;
 }
 IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 1de20e1..b4e6471 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -105,6 +105,8 @@
 
 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
 
+static struct gic_kvm_info gic_v2_kvm_info;
+
 #ifdef CONFIG_GIC_NON_BANKED
 static void __iomem *gic_get_percpu_base(union gic_base *base)
 {
@@ -1248,7 +1250,7 @@
 	return true;
 }
 
-static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
+static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
 {
 	if (!gic || !node)
 		return -EINVAL;
@@ -1272,6 +1274,29 @@
 	return -ENOMEM;
 }
 
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+	int ret;
+	struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+	struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+	gic_v2_kvm_info.type = GIC_V2;
+
+	gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+	if (!gic_v2_kvm_info.maint_irq)
+		return;
+
+	ret = of_address_to_resource(node, 2, vctrl_res);
+	if (ret)
+		return;
+
+	ret = of_address_to_resource(node, 3, vcpu_res);
+	if (ret)
+		return;
+
+	gic_set_kvm_info(&gic_v2_kvm_info);
+}
+
 int __init
 gic_of_init(struct device_node *node, struct device_node *parent)
 {
@@ -1303,8 +1328,10 @@
 		return ret;
 	}
 
-	if (!gic_cnt)
+	if (!gic_cnt) {
 		gic_init_physaddr(node);
+		gic_of_setup_kvm_info(node);
+	}
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
@@ -1330,7 +1357,14 @@
 #endif
 
 #ifdef CONFIG_ACPI
-static phys_addr_t cpu_phy_base __initdata;
+static struct
+{
+	phys_addr_t cpu_phys_base;
+	u32 maint_irq;
+	int maint_irq_mode;
+	phys_addr_t vctrl_base;
+	phys_addr_t vcpu_base;
+} acpi_data __initdata;
 
 static int __init
 gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
@@ -1350,10 +1384,16 @@
 	 * All CPU interface addresses have to be the same.
 	 */
 	gic_cpu_base = processor->base_address;
-	if (cpu_base_assigned && gic_cpu_base != cpu_phy_base)
+	if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base)
 		return -EINVAL;
 
-	cpu_phy_base = gic_cpu_base;
+	acpi_data.cpu_phys_base = gic_cpu_base;
+	acpi_data.maint_irq = processor->vgic_interrupt;
+	acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+				    ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+	acpi_data.vctrl_base = processor->gich_base_address;
+	acpi_data.vcpu_base = processor->gicv_base_address;
+
 	cpu_base_assigned = 1;
 	return 0;
 }
@@ -1384,6 +1424,41 @@
 
 #define ACPI_GICV2_DIST_MEM_SIZE	(SZ_4K)
 #define ACPI_GIC_CPU_IF_MEM_SIZE	(SZ_8K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE	(SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE	(SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+	int irq;
+	struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+	struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+	gic_v2_kvm_info.type = GIC_V2;
+
+	if (!acpi_data.vctrl_base)
+		return;
+
+	vctrl_res->flags = IORESOURCE_MEM;
+	vctrl_res->start = acpi_data.vctrl_base;
+	vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1;
+
+	if (!acpi_data.vcpu_base)
+		return;
+
+	vcpu_res->flags = IORESOURCE_MEM;
+	vcpu_res->start = acpi_data.vcpu_base;
+	vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+
+	irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+				acpi_data.maint_irq_mode,
+				ACPI_ACTIVE_HIGH);
+	if (irq <= 0)
+		return;
+
+	gic_v2_kvm_info.maint_irq = irq;
+
+	gic_set_kvm_info(&gic_v2_kvm_info);
+}
 
 static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
 				   const unsigned long end)
@@ -1401,7 +1476,7 @@
 		return -EINVAL;
 	}
 
-	gic->raw_cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
+	gic->raw_cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
 	if (!gic->raw_cpu_base) {
 		pr_err("Unable to map GICC registers\n");
 		return -ENOMEM;
@@ -1447,6 +1522,8 @@
 	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
 		gicv2m_init(NULL, gic_data[0].domain);
 
+	gic_acpi_setup_kvm_info();
+
 	return 0;
 }
 IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 4dffccf..c089f49 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -197,7 +197,7 @@
 
 	local_irq_save(flags);
 
-	gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), cpu);
+	gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), mips_cm_vp_id(cpu));
 
 	if (mips_cm_is64) {
 		gic_write(GIC_REG(VPE_OTHER, GIC_VPE_COMPARE), cnt);
@@ -246,6 +246,14 @@
 
 #endif
 
+unsigned gic_read_local_vp_id(void)
+{
+	unsigned long ident;
+
+	ident = gic_read(GIC_REG(VPE_LOCAL, GIC_VP_IDENT));
+	return ident & GIC_VP_IDENT_VCNUM_MSK;
+}
+
 static bool gic_local_irq_is_routable(int intr)
 {
 	u32 vpe_ctl;
@@ -553,7 +561,8 @@
 
 	spin_lock_irqsave(&gic_lock, flags);
 	for (i = 0; i < gic_vpes; i++) {
-		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR),
+			  mips_cm_vp_id(i));
 		gic_write32(GIC_REG(VPE_OTHER, GIC_VPE_RMASK), 1 << intr);
 	}
 	spin_unlock_irqrestore(&gic_lock, flags);
@@ -567,7 +576,8 @@
 
 	spin_lock_irqsave(&gic_lock, flags);
 	for (i = 0; i < gic_vpes; i++) {
-		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR),
+			  mips_cm_vp_id(i));
 		gic_write32(GIC_REG(VPE_OTHER, GIC_VPE_SMASK), 1 << intr);
 	}
 	spin_unlock_irqrestore(&gic_lock, flags);
@@ -607,7 +617,8 @@
 	for (i = 0; i < gic_vpes; i++) {
 		unsigned int j;
 
-		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR),
+			  mips_cm_vp_id(i));
 		for (j = 0; j < GIC_NUM_LOCAL_INTRS; j++) {
 			if (!gic_local_irq_is_routable(j))
 				continue;
@@ -652,7 +663,8 @@
 	for (i = 0; i < gic_vpes; i++) {
 		u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin;
 
-		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR),
+			  mips_cm_vp_id(i));
 
 		switch (intr) {
 		case GIC_LOCAL_INT_WD:
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 3fe86b5..d8129ec 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -46,7 +46,7 @@
  * allocated while we're using it
  */
 static int bitmap_checkpage(struct bitmap_counts *bitmap,
-			    unsigned long page, int create)
+			    unsigned long page, int create, int no_hijack)
 __releases(bitmap->lock)
 __acquires(bitmap->lock)
 {
@@ -90,6 +90,9 @@
 
 	if (mappage == NULL) {
 		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
+		/* We don't support hijack for cluster raid */
+		if (no_hijack)
+			return -ENOMEM;
 		/* failed - set the hijacked flag so that we can use the
 		 * pointer as a counter */
 		if (!bitmap->bp[page].map)
@@ -756,7 +759,7 @@
 		bytes += sizeof(bitmap_super_t);
 
 	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
-	offset = slot_number * (num_pages - 1);
+	offset = slot_number * num_pages;
 
 	store->filemap = kmalloc(sizeof(struct page *)
 				 * num_pages, GFP_KERNEL);
@@ -900,6 +903,11 @@
 	struct page *page;
 	void *kaddr;
 	unsigned long chunk = block >> bitmap->counts.chunkshift;
+	struct bitmap_storage *store = &bitmap->storage;
+	unsigned long node_offset = 0;
+
+	if (mddev_is_clustered(bitmap->mddev))
+		node_offset = bitmap->cluster_slot * store->file_pages;
 
 	page = filemap_get_page(&bitmap->storage, chunk);
 	if (!page)
@@ -915,7 +923,7 @@
 	kunmap_atomic(kaddr);
 	pr_debug("set file bit %lu page %lu\n", bit, page->index);
 	/* record page number so it gets flushed to disk when unplug occurs */
-	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
+	set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
 }
 
 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
@@ -924,6 +932,11 @@
 	struct page *page;
 	void *paddr;
 	unsigned long chunk = block >> bitmap->counts.chunkshift;
+	struct bitmap_storage *store = &bitmap->storage;
+	unsigned long node_offset = 0;
+
+	if (mddev_is_clustered(bitmap->mddev))
+		node_offset = bitmap->cluster_slot * store->file_pages;
 
 	page = filemap_get_page(&bitmap->storage, chunk);
 	if (!page)
@@ -935,8 +948,8 @@
 	else
 		clear_bit_le(bit, paddr);
 	kunmap_atomic(paddr);
-	if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
-		set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
+	if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
+		set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
 		bitmap->allclean = 0;
 	}
 }
@@ -1321,7 +1334,7 @@
 	sector_t csize;
 	int err;
 
-	err = bitmap_checkpage(bitmap, page, create);
+	err = bitmap_checkpage(bitmap, page, create, 0);
 
 	if (bitmap->bp[page].hijacked ||
 	    bitmap->bp[page].map == NULL)
@@ -1594,6 +1607,27 @@
 }
 EXPORT_SYMBOL(bitmap_cond_end_sync);
 
+void bitmap_sync_with_cluster(struct mddev *mddev,
+			      sector_t old_lo, sector_t old_hi,
+			      sector_t new_lo, sector_t new_hi)
+{
+	struct bitmap *bitmap = mddev->bitmap;
+	sector_t sector, blocks = 0;
+
+	for (sector = old_lo; sector < new_lo; ) {
+		bitmap_end_sync(bitmap, sector, &blocks, 0);
+		sector += blocks;
+	}
+	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
+
+	for (sector = old_hi; sector < new_hi; ) {
+		bitmap_start_sync(bitmap, sector, &blocks, 0);
+		sector += blocks;
+	}
+	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
+}
+EXPORT_SYMBOL(bitmap_sync_with_cluster);
+
 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
 {
 	/* For each chunk covered by any of these sectors, set the
@@ -1814,6 +1848,9 @@
 	if (!bitmap)
 		goto out;
 
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
+
 	/* Clear out old bitmap info first:  Either there is none, or we
 	 * are resuming after someone else has possibly changed things,
 	 * so we should forget old cached info.
@@ -1890,14 +1927,14 @@
 
 	if (clear_bits) {
 		bitmap_update_sb(bitmap);
-		/* Setting this for the ev_page should be enough.
-		 * And we do not require both write_all and PAGE_DIRT either
-		 */
+		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
+		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
 		for (i = 0; i < bitmap->storage.file_pages; i++)
-			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
-		bitmap_write_all(bitmap);
+			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
+				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
 		bitmap_unplug(bitmap);
 	}
+	bitmap_unplug(mddev->bitmap);
 	*low = lo;
 	*high = hi;
 err:
@@ -2032,6 +2069,35 @@
 		     chunks << chunkshift);
 
 	spin_lock_irq(&bitmap->counts.lock);
+	/* For cluster raid, need to pre-allocate bitmap */
+	if (mddev_is_clustered(bitmap->mddev)) {
+		unsigned long page;
+		for (page = 0; page < pages; page++) {
+			ret = bitmap_checkpage(&bitmap->counts, page, 1, 1);
+			if (ret) {
+				unsigned long k;
+
+				/* deallocate the page memory */
+				for (k = 0; k < page; k++) {
+					kfree(new_bp[k].map);
+				}
+
+				/* restore some fields from old_counts */
+				bitmap->counts.bp = old_counts.bp;
+				bitmap->counts.pages = old_counts.pages;
+				bitmap->counts.missing_pages = old_counts.pages;
+				bitmap->counts.chunkshift = old_counts.chunkshift;
+				bitmap->counts.chunks = old_counts.chunks;
+				bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
+									     BITMAP_BLOCK_SHIFT);
+				blocks = old_counts.chunks << old_counts.chunkshift;
+				pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
+				break;
+			} else
+				bitmap->counts.bp[page].count += 1;
+		}
+	}
+
 	for (block = 0; block < blocks; ) {
 		bitmap_counter_t *bmc_old, *bmc_new;
 		int set;
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 5e3fcd6..5b6dd63 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -258,6 +258,9 @@
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
 void bitmap_close_sync(struct bitmap *bitmap);
 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
+void bitmap_sync_with_cluster(struct mddev *mddev,
+			      sector_t old_lo, sector_t old_hi,
+			      sector_t new_lo, sector_t new_hi);
 
 void bitmap_unplug(struct bitmap *bitmap);
 void bitmap_daemon_work(struct mddev *mddev);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index dd97d42..41573f1 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -61,6 +61,10 @@
  * the lock.
  */
 #define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
+/* We should receive message after node joined cluster and
+ * set up all the related infos such as bitmap and personality */
+#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
+#define		MD_CLUSTER_PENDING_RECV_EVENT		7
 
 
 struct md_cluster_info {
@@ -85,6 +89,9 @@
 	struct completion newdisk_completion;
 	wait_queue_head_t wait;
 	unsigned long state;
+	/* record the region in RESYNCING message */
+	sector_t sync_low;
+	sector_t sync_hi;
 };
 
 enum msg_type {
@@ -284,11 +291,14 @@
 			goto dlm_unlock;
 		}
 		if (hi > 0) {
-			/* TODO:Wait for current resync to get over */
-			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 			if (lo < mddev->recovery_cp)
 				mddev->recovery_cp = lo;
-			md_check_recovery(mddev);
+			/* wake up thread to continue resync in case resync
+			 * is not finished */
+			if (mddev->recovery_cp != MaxSector) {
+			    set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+			    md_wakeup_thread(mddev->thread);
+			}
 		}
 dlm_unlock:
 		dlm_unlock_sync(bm_lockres);
@@ -370,8 +380,12 @@
 	struct dlm_lock_resource *res = arg;
 	struct md_cluster_info *cinfo = res->mddev->cluster_info;
 
-	if (mode == DLM_LOCK_EX)
-		md_wakeup_thread(cinfo->recv_thread);
+	if (mode == DLM_LOCK_EX) {
+		if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
+			md_wakeup_thread(cinfo->recv_thread);
+		else
+			set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
+	}
 }
 
 static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
@@ -408,6 +422,30 @@
 		md_wakeup_thread(mddev->thread);
 		return;
 	}
+
+	/*
+	 * The bitmaps are not same for different nodes
+	 * if RESYNCING is happening in one node, then
+	 * the node which received the RESYNCING message
+	 * probably will perform resync with the region
+	 * [lo, hi] again, so we could reduce resync time
+	 * a lot if we can ensure that the bitmaps among
+	 * different nodes are match up well.
+	 *
+	 * sync_low/hi is used to record the region which
+	 * arrived in the previous RESYNCING message,
+	 *
+	 * Call bitmap_sync_with_cluster to clear
+	 * NEEDED_MASK and set RESYNC_MASK since
+	 * resync thread is running in another node,
+	 * so we don't need to do the resync again
+	 * with the same section */
+	bitmap_sync_with_cluster(mddev, cinfo->sync_low,
+					cinfo->sync_hi,
+					lo, hi);
+	cinfo->sync_low = lo;
+	cinfo->sync_hi = hi;
+
 	s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
 	if (!s)
 		return;
@@ -482,11 +520,13 @@
 			__func__, __LINE__, le32_to_cpu(msg->raid_slot));
 }
 
-static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
+static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 {
+	int ret = 0;
+
 	if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
 		"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
-		return;
+		return -1;
 	switch (le32_to_cpu(msg->type)) {
 	case METADATA_UPDATED:
 		process_metadata_update(mddev, msg);
@@ -509,9 +549,11 @@
 		__recover_slot(mddev, le32_to_cpu(msg->slot));
 		break;
 	default:
+		ret = -1;
 		pr_warn("%s:%d Received unknown message from %d\n",
 			__func__, __LINE__, msg->slot);
 	}
+	return ret;
 }
 
 /*
@@ -535,7 +577,9 @@
 
 	/* read lvb and wake up thread to process this message_lockres */
 	memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
-	process_recvd_msg(thread->mddev, &msg);
+	ret = process_recvd_msg(thread->mddev, &msg);
+	if (ret)
+		goto out;
 
 	/*release CR on ack_lockres*/
 	ret = dlm_unlock_sync(ack_lockres);
@@ -549,6 +593,7 @@
 	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
 	if (unlikely(ret != 0))
 		pr_info("lock CR on ack failed return %d\n", ret);
+out:
 	/*release CR on message_lockres*/
 	ret = dlm_unlock_sync(message_lockres);
 	if (unlikely(ret != 0))
@@ -778,17 +823,24 @@
 	cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
 	if (!cinfo->token_lockres)
 		goto err;
-	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
-	if (!cinfo->ack_lockres)
-		goto err;
 	cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
 	if (!cinfo->no_new_dev_lockres)
 		goto err;
 
+	ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
+	if (ret) {
+		ret = -EAGAIN;
+		pr_err("md-cluster: can't join cluster to avoid lock issue\n");
+		goto err;
+	}
+	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
+	if (!cinfo->ack_lockres)
+		goto err;
 	/* get sync CR lock on ACK. */
 	if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
 		pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
 				ret);
+	dlm_unlock_sync(cinfo->token_lockres);
 	/* get sync CR lock on no-new-dev. */
 	if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
 		pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
@@ -809,12 +861,10 @@
 	if (!cinfo->resync_lockres)
 		goto err;
 
-	ret = gather_all_resync_info(mddev, nodes);
-	if (ret)
-		goto err;
-
 	return 0;
 err:
+	md_unregister_thread(&cinfo->recovery_thread);
+	md_unregister_thread(&cinfo->recv_thread);
 	lockres_free(cinfo->message_lockres);
 	lockres_free(cinfo->token_lockres);
 	lockres_free(cinfo->ack_lockres);
@@ -828,6 +878,19 @@
 	return ret;
 }
 
+static void load_bitmaps(struct mddev *mddev, int total_slots)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	/* load all the node's bitmap info for resync */
+	if (gather_all_resync_info(mddev, total_slots))
+		pr_err("md-cluster: failed to gather all resyn infos\n");
+	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
+	/* wake up recv thread in case something need to be handled */
+	if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
+		md_wakeup_thread(cinfo->recv_thread);
+}
+
 static void resync_bitmap(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -937,7 +1000,6 @@
 static int resync_start(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
-	cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
 	return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
 }
 
@@ -967,7 +1029,6 @@
 static int resync_finish(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
-	cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
 	dlm_unlock_sync(cinfo->resync_lockres);
 	return resync_info_update(mddev, 0, 0);
 }
@@ -1171,6 +1232,7 @@
 	.add_new_disk_cancel = add_new_disk_cancel,
 	.new_disk_ack = new_disk_ack,
 	.remove_disk = remove_disk,
+	.load_bitmaps = load_bitmaps,
 	.gather_bitmaps = gather_bitmaps,
 	.lock_all_bitmaps = lock_all_bitmaps,
 	.unlock_all_bitmaps = unlock_all_bitmaps,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 45ce6c9..e765499 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -23,6 +23,7 @@
 	void (*add_new_disk_cancel)(struct mddev *mddev);
 	int (*new_disk_ack)(struct mddev *mddev, bool ack);
 	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
 	int (*gather_bitmaps)(struct md_rdev *rdev);
 	int (*lock_all_bitmaps)(struct mddev *mddev);
 	void (*unlock_all_bitmaps)(struct mddev *mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c9a475c..866825f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -307,7 +307,7 @@
  */
 void mddev_suspend(struct mddev *mddev)
 {
-	WARN_ON_ONCE(current == mddev->thread->tsk);
+	WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
 	if (mddev->suspended++)
 		return;
 	synchronize_rcu();
@@ -2291,19 +2291,24 @@
 		return;
 	}
 
+repeat:
 	if (mddev_is_clustered(mddev)) {
 		if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
 			force_change = 1;
+		if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+			nospares = 1;
 		ret = md_cluster_ops->metadata_update_start(mddev);
 		/* Has someone else has updated the sb */
 		if (!does_sb_need_changing(mddev)) {
 			if (ret == 0)
 				md_cluster_ops->metadata_update_cancel(mddev);
-			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+			bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+							 BIT(MD_CHANGE_DEVS) |
+							 BIT(MD_CHANGE_CLEAN));
 			return;
 		}
 	}
-repeat:
+
 	/* First make sure individual recovery_offsets are correct */
 	rdev_for_each(rdev, mddev) {
 		if (rdev->raid_disk >= 0 &&
@@ -2430,15 +2435,14 @@
 	md_super_wait(mddev);
 	/* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
 
-	spin_lock(&mddev->lock);
+	if (mddev_is_clustered(mddev) && ret == 0)
+		md_cluster_ops->metadata_update_finish(mddev);
+
 	if (mddev->in_sync != sync_req ||
-	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
+	    !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+			       BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
 		/* have to write it out again */
-		spin_unlock(&mddev->lock);
 		goto repeat;
-	}
-	clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-	spin_unlock(&mddev->lock);
 	wake_up(&mddev->sb_wait);
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -2452,9 +2456,6 @@
 		clear_bit(BlockedBadBlocks, &rdev->flags);
 		wake_up(&rdev->blocked_wait);
 	}
-
-	if (mddev_is_clustered(mddev) && ret == 0)
-		md_cluster_ops->metadata_update_finish(mddev);
 }
 EXPORT_SYMBOL(md_update_sb);
 
@@ -4816,6 +4817,10 @@
 	if (err)
 		return err;
 
+	/* cluster raid doesn't support change array_sectors */
+	if (mddev_is_clustered(mddev))
+		return -EINVAL;
+
 	if (strncmp(buf, "default", 7) == 0) {
 		if (mddev->pers)
 			sectors = mddev->pers->size(mddev, 0, 0);
@@ -6437,6 +6442,10 @@
 	int rv;
 	int fit = (num_sectors == 0);
 
+	/* cluster raid doesn't support update size */
+	if (mddev_is_clustered(mddev))
+		return -EINVAL;
+
 	if (mddev->pers->resize == NULL)
 		return -EINVAL;
 	/* The "num_sectors" is the number of sectors of each device that
@@ -7785,7 +7794,7 @@
 	struct md_rdev *rdev;
 	char *desc, *action = NULL;
 	struct blk_plug plug;
-	bool cluster_resync_finished = false;
+	int ret;
 
 	/* just incase thread restarts... */
 	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7795,6 +7804,19 @@
 		return;
 	}
 
+	if (mddev_is_clustered(mddev)) {
+		ret = md_cluster_ops->resync_start(mddev);
+		if (ret)
+			goto skip;
+
+		if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+			test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+			test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
+		     && ((unsigned long long)mddev->curr_resync_completed
+			 < (unsigned long long)mddev->resync_max_sectors))
+			goto skip;
+	}
+
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
 			desc = "data-check";
@@ -8089,11 +8111,6 @@
 		mddev->curr_resync_completed = mddev->curr_resync;
 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 	}
-	/* tell personality and other nodes that we are finished */
-	if (mddev_is_clustered(mddev)) {
-		md_cluster_ops->resync_finish(mddev);
-		cluster_resync_finished = true;
-	}
 	mddev->pers->sync_request(mddev, max_sectors, &skipped);
 
 	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
@@ -8130,12 +8147,18 @@
 		}
 	}
  skip:
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
 	if (mddev_is_clustered(mddev) &&
-	    test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-	    !cluster_resync_finished)
+	    ret == 0) {
+		/* set CHANGE_PENDING here since maybe another
+		 * update is needed, so other nodes are informed */
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
+		md_wakeup_thread(mddev->thread);
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
 		md_cluster_ops->resync_finish(mddev);
+	} else
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
 	spin_lock(&mddev->lock);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8226,18 +8249,9 @@
 	struct mddev *mddev = container_of(ws, struct mddev, del_work);
 	int ret = 0;
 
-	if (mddev_is_clustered(mddev)) {
-		ret = md_cluster_ops->resync_start(mddev);
-		if (ret) {
-			mddev->sync_thread = NULL;
-			goto out;
-		}
-	}
-
 	mddev->sync_thread = md_register_thread(md_do_sync,
 						mddev,
 						"resync");
-out:
 	if (!mddev->sync_thread) {
 		if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
 			printk(KERN_ERR "%s: could not start resync"
@@ -8536,6 +8550,7 @@
 int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 		       int is_new)
 {
+	struct mddev *mddev = rdev->mddev;
 	int rv;
 	if (is_new)
 		s += rdev->new_data_offset;
@@ -8545,8 +8560,8 @@
 	if (rv == 0) {
 		/* Make sure they get written out promptly */
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
-		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(rdev->mddev->thread);
 		return 1;
 	} else
@@ -8680,6 +8695,11 @@
 				ret = remove_and_add_spares(mddev, rdev2);
 				pr_info("Activated spare: %s\n",
 						bdevname(rdev2->bdev,b));
+				/* wakeup mddev->thread here, so array could
+				 * perform resync with the new activated disk */
+				set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+				md_wakeup_thread(mddev->thread);
+
 			}
 			/* device faulty
 			 * We just want to do the minimum to mark the disk
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a7f2b9c..c7c8cde 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1474,8 +1474,8 @@
 	 * if recovery is running, make sure it aborts.
 	 */
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	printk(KERN_ALERT
 	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
 	       "md/raid1:%s: Operation continuing on %d devices.\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e3fd725..c7de2a5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1102,8 +1102,8 @@
 		bio->bi_iter.bi_sector < conf->reshape_progress))) {
 		/* Need to update reshape_position in metadata */
 		mddev->reshape_position = conf->reshape_progress;
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -1591,8 +1591,8 @@
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	printk(KERN_ALERT
 	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
@@ -3782,8 +3782,10 @@
 			return ret;
 	}
 	md_set_array_sectors(mddev, size);
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
+	if (mddev->queue) {
+		set_capacity(mddev->gendisk, mddev->array_sectors);
+		revalidate_disk(mddev->gendisk);
+	}
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > oldsize) {
 		mddev->recovery_cp = oldsize;
@@ -4593,8 +4595,10 @@
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		}
 		mddev->resync_max_sectors = size;
-		set_capacity(mddev->gendisk, mddev->array_sectors);
-		revalidate_disk(mddev->gendisk);
+		if (mddev->queue) {
+			set_capacity(mddev->gendisk, mddev->array_sectors);
+			revalidate_disk(mddev->gendisk);
+		}
 	} else {
 		int d;
 		for (d = conf->geo.raid_disks ;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 26f1497..e889e2d 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -712,8 +712,8 @@
 	 * in_teardown check workaround this issue.
 	 */
 	if (!log->in_teardown) {
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e48c262c..8959e6d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2514,8 +2514,8 @@
 
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	printk(KERN_ALERT
 	       "md/raid:%s: Disk failure on %s, disabling device.\n"
 	       "md/raid:%s: Operation continuing on %d devices.\n",
@@ -7572,8 +7572,10 @@
 
 		if (mddev->delta_disks > 0) {
 			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-			set_capacity(mddev->gendisk, mddev->array_sectors);
-			revalidate_disk(mddev->gendisk);
+			if (mddev->queue) {
+				set_capacity(mddev->gendisk, mddev->array_sectors);
+				revalidate_disk(mddev->gendisk);
+			}
 		} else {
 			int d;
 			spin_lock_irq(&conf->device_lock);
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 7688360..5557ef8 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -1251,9 +1251,9 @@
 	i2c_unregister_device(client);
 }
 
-static int m88ds3103_select(struct i2c_adapter *adap, void *mux_priv, u32 chan)
+static int m88ds3103_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct m88ds3103_dev *dev = mux_priv;
+	struct m88ds3103_dev *dev = i2c_mux_priv(muxc);
 	struct i2c_client *client = dev->client;
 	int ret;
 	struct i2c_msg msg = {
@@ -1374,7 +1374,7 @@
 
 	dev_dbg(&client->dev, "\n");
 
-	return dev->i2c_adapter;
+	return dev->muxc->adapter[0];
 }
 
 static int m88ds3103_probe(struct i2c_client *client,
@@ -1467,13 +1467,16 @@
 		goto err_kfree;
 
 	/* create mux i2c adapter for tuner */
-	dev->i2c_adapter = i2c_add_mux_adapter(client->adapter, &client->dev,
-					       dev, 0, 0, 0, m88ds3103_select,
-					       NULL);
-	if (dev->i2c_adapter == NULL) {
+	dev->muxc = i2c_mux_alloc(client->adapter, &client->dev, 1, 0, 0,
+				  m88ds3103_select, NULL);
+	if (!dev->muxc) {
 		ret = -ENOMEM;
 		goto err_kfree;
 	}
+	dev->muxc->priv = dev;
+	ret = i2c_mux_add_adapter(dev->muxc, 0, 0, 0);
+	if (ret)
+		goto err_kfree;
 
 	/* create dvb_frontend */
 	memcpy(&dev->fe.ops, &m88ds3103_ops, sizeof(struct dvb_frontend_ops));
@@ -1502,7 +1505,7 @@
 
 	dev_dbg(&client->dev, "\n");
 
-	i2c_del_mux_adapter(dev->i2c_adapter);
+	i2c_mux_del_adapters(dev->muxc);
 
 	kfree(dev);
 	return 0;
diff --git a/drivers/media/dvb-frontends/m88ds3103_priv.h b/drivers/media/dvb-frontends/m88ds3103_priv.h
index 651e005..d78e467 100644
--- a/drivers/media/dvb-frontends/m88ds3103_priv.h
+++ b/drivers/media/dvb-frontends/m88ds3103_priv.h
@@ -42,7 +42,7 @@
 	enum fe_status fe_status;
 	u32 dvbv3_ber; /* for old DVBv3 API read_ber */
 	bool warm; /* FW running */
-	struct i2c_adapter *i2c_adapter;
+	struct i2c_mux_core *muxc;
 	/* auto detect chip id to do different config */
 	u8 chip_id;
 	/* main mclk is calculated for M88RS6000 dynamically */
diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c
index 3f96429..d25d1e0 100644
--- a/drivers/media/dvb-frontends/rtl2830.c
+++ b/drivers/media/dvb-frontends/rtl2830.c
@@ -677,9 +677,9 @@
  * adapter lock is already taken by tuner driver.
  * Gate is closed automatically after single I2C transfer.
  */
-static int rtl2830_select(struct i2c_adapter *adap, void *mux_priv, u32 chan_id)
+static int rtl2830_select(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	struct i2c_client *client = mux_priv;
+	struct i2c_client *client = i2c_mux_priv(muxc);
 	struct rtl2830_dev *dev = i2c_get_clientdata(client);
 	int ret;
 
@@ -712,7 +712,7 @@
 
 	dev_dbg(&client->dev, "\n");
 
-	return dev->adapter;
+	return dev->muxc->adapter[0];
 }
 
 /*
@@ -865,12 +865,16 @@
 		goto err_regmap_exit;
 
 	/* create muxed i2c adapter for tuner */
-	dev->adapter = i2c_add_mux_adapter(client->adapter, &client->dev,
-			client, 0, 0, 0, rtl2830_select, NULL);
-	if (dev->adapter == NULL) {
-		ret = -ENODEV;
+	dev->muxc = i2c_mux_alloc(client->adapter, &client->dev, 1, 0, 0,
+				  rtl2830_select, NULL);
+	if (!dev->muxc) {
+		ret = -ENOMEM;
 		goto err_regmap_exit;
 	}
+	dev->muxc->priv = client;
+	ret = i2c_mux_add_adapter(dev->muxc, 0, 0, 0);
+	if (ret)
+		goto err_regmap_exit;
 
 	/* create dvb frontend */
 	memcpy(&dev->fe.ops, &rtl2830_ops, sizeof(dev->fe.ops));
@@ -903,7 +907,7 @@
 	/* stop statistics polling */
 	cancel_delayed_work_sync(&dev->stat_work);
 
-	i2c_del_mux_adapter(dev->adapter);
+	i2c_mux_del_adapters(dev->muxc);
 	regmap_exit(dev->regmap);
 	kfree(dev);
 
diff --git a/drivers/media/dvb-frontends/rtl2830_priv.h b/drivers/media/dvb-frontends/rtl2830_priv.h
index cf793f3..da49095 100644
--- a/drivers/media/dvb-frontends/rtl2830_priv.h
+++ b/drivers/media/dvb-frontends/rtl2830_priv.h
@@ -29,7 +29,7 @@
 	struct rtl2830_platform_data *pdata;
 	struct i2c_client *client;
 	struct regmap *regmap;
-	struct i2c_adapter *adapter;
+	struct i2c_mux_core *muxc;
 	struct dvb_frontend fe;
 	bool sleeping;
 	unsigned long filters;
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index 7c96f76..bfb6bee 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -153,43 +153,6 @@
 	[DVBT_REG_4MSEL]	= {0x013,  0, 0},
 };
 
-/* Our regmap is bypassing I2C adapter lock, thus we do it! */
-static int rtl2832_bulk_write(struct i2c_client *client, unsigned int reg,
-			      const void *val, size_t val_count)
-{
-	struct rtl2832_dev *dev = i2c_get_clientdata(client);
-	int ret;
-
-	i2c_lock_adapter(client->adapter);
-	ret = regmap_bulk_write(dev->regmap, reg, val, val_count);
-	i2c_unlock_adapter(client->adapter);
-	return ret;
-}
-
-static int rtl2832_update_bits(struct i2c_client *client, unsigned int reg,
-			       unsigned int mask, unsigned int val)
-{
-	struct rtl2832_dev *dev = i2c_get_clientdata(client);
-	int ret;
-
-	i2c_lock_adapter(client->adapter);
-	ret = regmap_update_bits(dev->regmap, reg, mask, val);
-	i2c_unlock_adapter(client->adapter);
-	return ret;
-}
-
-static int rtl2832_bulk_read(struct i2c_client *client, unsigned int reg,
-			     void *val, size_t val_count)
-{
-	struct rtl2832_dev *dev = i2c_get_clientdata(client);
-	int ret;
-
-	i2c_lock_adapter(client->adapter);
-	ret = regmap_bulk_read(dev->regmap, reg, val, val_count);
-	i2c_unlock_adapter(client->adapter);
-	return ret;
-}
-
 static int rtl2832_rd_demod_reg(struct rtl2832_dev *dev, int reg, u32 *val)
 {
 	struct i2c_client *client = dev->client;
@@ -204,7 +167,7 @@
 	len = (msb >> 3) + 1;
 	mask = REG_MASK(msb - lsb);
 
-	ret = rtl2832_bulk_read(client, reg_start_addr, reading, len);
+	ret = regmap_bulk_read(dev->regmap, reg_start_addr, reading, len);
 	if (ret)
 		goto err;
 
@@ -234,7 +197,7 @@
 	len = (msb >> 3) + 1;
 	mask = REG_MASK(msb - lsb);
 
-	ret = rtl2832_bulk_read(client, reg_start_addr, reading, len);
+	ret = regmap_bulk_read(dev->regmap, reg_start_addr, reading, len);
 	if (ret)
 		goto err;
 
@@ -248,7 +211,7 @@
 	for (i = 0; i < len; i++)
 		writing[i] = (writing_tmp >> ((len - 1 - i) * 8)) & 0xff;
 
-	ret = rtl2832_bulk_write(client, reg_start_addr, writing, len);
+	ret = regmap_bulk_write(dev->regmap, reg_start_addr, writing, len);
 	if (ret)
 		goto err;
 
@@ -525,7 +488,8 @@
 	}
 
 	for (j = 0; j < sizeof(bw_params[0]); j++) {
-		ret = rtl2832_bulk_write(client, 0x11c + j, &bw_params[i][j], 1);
+		ret = regmap_bulk_write(dev->regmap,
+					0x11c + j, &bw_params[i][j], 1);
 		if (ret)
 			goto err;
 	}
@@ -581,11 +545,11 @@
 	if (dev->sleeping)
 		return 0;
 
-	ret = rtl2832_bulk_read(client, 0x33c, buf, 2);
+	ret = regmap_bulk_read(dev->regmap, 0x33c, buf, 2);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_bulk_read(client, 0x351, &buf[2], 1);
+	ret = regmap_bulk_read(dev->regmap, 0x351, &buf[2], 1);
 	if (ret)
 		goto err;
 
@@ -716,7 +680,7 @@
 	/* signal strength */
 	if (dev->fe_status & FE_HAS_SIGNAL) {
 		/* read digital AGC */
-		ret = rtl2832_bulk_read(client, 0x305, &u8tmp, 1);
+		ret = regmap_bulk_read(dev->regmap, 0x305, &u8tmp, 1);
 		if (ret)
 			goto err;
 
@@ -742,7 +706,7 @@
 			{87659938, 87659938, 87885178, 88241743},
 		};
 
-		ret = rtl2832_bulk_read(client, 0x33c, &u8tmp, 1);
+		ret = regmap_bulk_read(dev->regmap, 0x33c, &u8tmp, 1);
 		if (ret)
 			goto err;
 
@@ -754,7 +718,7 @@
 		if (hierarchy > HIERARCHY_NUM - 1)
 			goto err;
 
-		ret = rtl2832_bulk_read(client, 0x40c, buf, 2);
+		ret = regmap_bulk_read(dev->regmap, 0x40c, buf, 2);
 		if (ret)
 			goto err;
 
@@ -775,7 +739,7 @@
 
 	/* BER */
 	if (dev->fe_status & FE_HAS_LOCK) {
-		ret = rtl2832_bulk_read(client, 0x34e, buf, 2);
+		ret = regmap_bulk_read(dev->regmap, 0x34e, buf, 2);
 		if (ret)
 			goto err;
 
@@ -825,8 +789,6 @@
 
 /*
  * I2C gate/mux/repeater logic
- * We must use unlocked __i2c_transfer() here (through regmap) because of I2C
- * adapter lock is already taken by tuner driver.
  * There is delay mechanism to avoid unneeded I2C gate open / close. Gate close
  * is delayed here a little bit in order to see if there is sequence of I2C
  * messages sent to same I2C bus.
@@ -838,7 +800,7 @@
 	int ret;
 
 	/* close gate */
-	ret = rtl2832_update_bits(dev->client, 0x101, 0x08, 0x00);
+	ret = regmap_update_bits(dev->regmap, 0x101, 0x08, 0x00);
 	if (ret)
 		goto err;
 
@@ -847,19 +809,16 @@
 	dev_dbg(&client->dev, "failed=%d\n", ret);
 }
 
-static int rtl2832_select(struct i2c_adapter *adap, void *mux_priv, u32 chan_id)
+static int rtl2832_select(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	struct rtl2832_dev *dev = mux_priv;
+	struct rtl2832_dev *dev = i2c_mux_priv(muxc);
 	struct i2c_client *client = dev->client;
 	int ret;
 
 	/* terminate possible gate closing */
 	cancel_delayed_work(&dev->i2c_gate_work);
 
-	/*
-	 * I2C adapter lock is already taken and due to that we will use
-	 * regmap_update_bits() which does not lock again I2C adapter.
-	 */
+	/* open gate */
 	ret = regmap_update_bits(dev->regmap, 0x101, 0x08, 0x08);
 	if (ret)
 		goto err;
@@ -870,10 +829,9 @@
 	return ret;
 }
 
-static int rtl2832_deselect(struct i2c_adapter *adap, void *mux_priv,
-			    u32 chan_id)
+static int rtl2832_deselect(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	struct rtl2832_dev *dev = mux_priv;
+	struct rtl2832_dev *dev = i2c_mux_priv(muxc);
 
 	schedule_delayed_work(&dev->i2c_gate_work, usecs_to_jiffies(100));
 	return 0;
@@ -932,120 +890,6 @@
 	return false;
 }
 
-/*
- * We implement own I2C access routines for regmap in order to get manual access
- * to I2C adapter lock, which is needed for I2C mux adapter.
- */
-static int rtl2832_regmap_read(void *context, const void *reg_buf,
-			       size_t reg_size, void *val_buf, size_t val_size)
-{
-	struct i2c_client *client = context;
-	int ret;
-	struct i2c_msg msg[2] = {
-		{
-			.addr = client->addr,
-			.flags = 0,
-			.len = reg_size,
-			.buf = (u8 *)reg_buf,
-		}, {
-			.addr = client->addr,
-			.flags = I2C_M_RD,
-			.len = val_size,
-			.buf = val_buf,
-		}
-	};
-
-	ret = __i2c_transfer(client->adapter, msg, 2);
-	if (ret != 2) {
-		dev_warn(&client->dev, "i2c reg read failed %d reg %02x\n",
-			 ret, *(u8 *)reg_buf);
-		if (ret >= 0)
-			ret = -EREMOTEIO;
-		return ret;
-	}
-	return 0;
-}
-
-static int rtl2832_regmap_write(void *context, const void *data, size_t count)
-{
-	struct i2c_client *client = context;
-	int ret;
-	struct i2c_msg msg[1] = {
-		{
-			.addr = client->addr,
-			.flags = 0,
-			.len = count,
-			.buf = (u8 *)data,
-		}
-	};
-
-	ret = __i2c_transfer(client->adapter, msg, 1);
-	if (ret != 1) {
-		dev_warn(&client->dev, "i2c reg write failed %d reg %02x\n",
-			 ret, *(u8 *)data);
-		if (ret >= 0)
-			ret = -EREMOTEIO;
-		return ret;
-	}
-	return 0;
-}
-
-static int rtl2832_regmap_gather_write(void *context, const void *reg,
-				       size_t reg_len, const void *val,
-				       size_t val_len)
-{
-	struct i2c_client *client = context;
-	int ret;
-	u8 buf[256];
-	struct i2c_msg msg[1] = {
-		{
-			.addr = client->addr,
-			.flags = 0,
-			.len = 1 + val_len,
-			.buf = buf,
-		}
-	};
-
-	buf[0] = *(u8 const *)reg;
-	memcpy(&buf[1], val, val_len);
-
-	ret = __i2c_transfer(client->adapter, msg, 1);
-	if (ret != 1) {
-		dev_warn(&client->dev, "i2c reg write failed %d reg %02x\n",
-			 ret, *(u8 const *)reg);
-		if (ret >= 0)
-			ret = -EREMOTEIO;
-		return ret;
-	}
-	return 0;
-}
-
-/*
- * FIXME: Hack. Implement own regmap locking in order to silence lockdep
- * recursive lock warning. That happens when regmap I2C client calls I2C mux
- * adapter, which leads demod I2C repeater enable via demod regmap. Operation
- * takes two regmap locks recursively - but those are different regmap instances
- * in a two different I2C drivers, so it is not deadlock. Proper fix is to make
- * regmap aware of lockdep.
- */
-static void rtl2832_regmap_lock(void *__dev)
-{
-	struct rtl2832_dev *dev = __dev;
-	struct i2c_client *client = dev->client;
-
-	dev_dbg(&client->dev, "\n");
-	mutex_lock(&dev->regmap_mutex);
-}
-
-static void rtl2832_regmap_unlock(void *__dev)
-{
-	struct rtl2832_dev *dev = __dev;
-	struct i2c_client *client = dev->client;
-
-	dev_dbg(&client->dev, "\n");
-	mutex_unlock(&dev->regmap_mutex);
-}
-
 static struct dvb_frontend *rtl2832_get_dvb_frontend(struct i2c_client *client)
 {
 	struct rtl2832_dev *dev = i2c_get_clientdata(client);
@@ -1059,7 +903,7 @@
 	struct rtl2832_dev *dev = i2c_get_clientdata(client);
 
 	dev_dbg(&client->dev, "\n");
-	return dev->i2c_adapter_tuner;
+	return dev->muxc->adapter[0];
 }
 
 static int rtl2832_slave_ts_ctrl(struct i2c_client *client, bool enable)
@@ -1073,29 +917,29 @@
 		ret = rtl2832_wr_demod_reg(dev, DVBT_SOFT_RST, 0x0);
 		if (ret)
 			goto err;
-		ret = rtl2832_bulk_write(client, 0x10c, "\x5f\xff", 2);
+		ret = regmap_bulk_write(dev->regmap, 0x10c, "\x5f\xff", 2);
 		if (ret)
 			goto err;
 		ret = rtl2832_wr_demod_reg(dev, DVBT_PIP_ON, 0x1);
 		if (ret)
 			goto err;
-		ret = rtl2832_bulk_write(client, 0x0bc, "\x18", 1);
+		ret = regmap_bulk_write(dev->regmap, 0x0bc, "\x18", 1);
 		if (ret)
 			goto err;
-		ret = rtl2832_bulk_write(client, 0x192, "\x7f\xf7\xff", 3);
+		ret = regmap_bulk_write(dev->regmap, 0x192, "\x7f\xf7\xff", 3);
 		if (ret)
 			goto err;
 	} else {
-		ret = rtl2832_bulk_write(client, 0x192, "\x00\x0f\xff", 3);
+		ret = regmap_bulk_write(dev->regmap, 0x192, "\x00\x0f\xff", 3);
 		if (ret)
 			goto err;
-		ret = rtl2832_bulk_write(client, 0x0bc, "\x08", 1);
+		ret = regmap_bulk_write(dev->regmap, 0x0bc, "\x08", 1);
 		if (ret)
 			goto err;
 		ret = rtl2832_wr_demod_reg(dev, DVBT_PIP_ON, 0x0);
 		if (ret)
 			goto err;
-		ret = rtl2832_bulk_write(client, 0x10c, "\x00\x00", 2);
+		ret = regmap_bulk_write(dev->regmap, 0x10c, "\x00\x00", 2);
 		if (ret)
 			goto err;
 		ret = rtl2832_wr_demod_reg(dev, DVBT_SOFT_RST, 0x1);
@@ -1124,7 +968,7 @@
 	else
 		u8tmp = 0x00;
 
-	ret = rtl2832_update_bits(client, 0x061, 0xc0, u8tmp);
+	ret = regmap_update_bits(dev->regmap, 0x061, 0xc0, u8tmp);
 	if (ret)
 		goto err;
 
@@ -1159,14 +1003,14 @@
 	buf[1] = (dev->filters >>  8) & 0xff;
 	buf[2] = (dev->filters >> 16) & 0xff;
 	buf[3] = (dev->filters >> 24) & 0xff;
-	ret = rtl2832_bulk_write(client, 0x062, buf, 4);
+	ret = regmap_bulk_write(dev->regmap, 0x062, buf, 4);
 	if (ret)
 		goto err;
 
 	/* add PID */
 	buf[0] = (pid >> 8) & 0xff;
 	buf[1] = (pid >> 0) & 0xff;
-	ret = rtl2832_bulk_write(client, 0x066 + 2 * index, buf, 2);
+	ret = regmap_bulk_write(dev->regmap, 0x066 + 2 * index, buf, 2);
 	if (ret)
 		goto err;
 
@@ -1184,12 +1028,6 @@
 	struct rtl2832_dev *dev;
 	int ret;
 	u8 tmp;
-	static const struct regmap_bus regmap_bus = {
-		.read = rtl2832_regmap_read,
-		.write = rtl2832_regmap_write,
-		.gather_write = rtl2832_regmap_gather_write,
-		.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
-	};
 	static const struct regmap_range_cfg regmap_range_cfg[] = {
 		{
 			.selector_reg     = 0x00,
@@ -1218,36 +1056,35 @@
 	dev->sleeping = true;
 	INIT_DELAYED_WORK(&dev->i2c_gate_work, rtl2832_i2c_gate_work);
 	/* create regmap */
-	mutex_init(&dev->regmap_mutex);
 	dev->regmap_config.reg_bits =  8,
 	dev->regmap_config.val_bits =  8,
-	dev->regmap_config.lock = rtl2832_regmap_lock,
-	dev->regmap_config.unlock = rtl2832_regmap_unlock,
-	dev->regmap_config.lock_arg = dev,
 	dev->regmap_config.volatile_reg = rtl2832_volatile_reg,
 	dev->regmap_config.max_register = 5 * 0x100,
 	dev->regmap_config.ranges = regmap_range_cfg,
 	dev->regmap_config.num_ranges = ARRAY_SIZE(regmap_range_cfg),
 	dev->regmap_config.cache_type = REGCACHE_NONE,
-	dev->regmap = regmap_init(&client->dev, &regmap_bus, client,
-				  &dev->regmap_config);
+	dev->regmap = regmap_init_i2c(client, &dev->regmap_config);
 	if (IS_ERR(dev->regmap)) {
 		ret = PTR_ERR(dev->regmap);
 		goto err_kfree;
 	}
 
 	/* check if the demod is there */
-	ret = rtl2832_bulk_read(client, 0x000, &tmp, 1);
+	ret = regmap_bulk_read(dev->regmap, 0x000, &tmp, 1);
 	if (ret)
 		goto err_regmap_exit;
 
 	/* create muxed i2c adapter for demod tuner bus */
-	dev->i2c_adapter_tuner = i2c_add_mux_adapter(i2c, &i2c->dev, dev,
-			0, 0, 0, rtl2832_select, rtl2832_deselect);
-	if (dev->i2c_adapter_tuner == NULL) {
-		ret = -ENODEV;
+	dev->muxc = i2c_mux_alloc(i2c, &i2c->dev, 1, 0, I2C_MUX_LOCKED,
+				  rtl2832_select, rtl2832_deselect);
+	if (!dev->muxc) {
+		ret = -ENOMEM;
 		goto err_regmap_exit;
 	}
+	dev->muxc->priv = dev;
+	ret = i2c_mux_add_adapter(dev->muxc, 0, 0, 0);
+	if (ret)
+		goto err_regmap_exit;
 
 	/* create dvb_frontend */
 	memcpy(&dev->fe.ops, &rtl2832_ops, sizeof(struct dvb_frontend_ops));
@@ -1259,9 +1096,7 @@
 	pdata->slave_ts_ctrl = rtl2832_slave_ts_ctrl;
 	pdata->pid_filter = rtl2832_pid_filter;
 	pdata->pid_filter_ctrl = rtl2832_pid_filter_ctrl;
-	pdata->bulk_read = rtl2832_bulk_read;
-	pdata->bulk_write = rtl2832_bulk_write;
-	pdata->update_bits = rtl2832_update_bits;
+	pdata->regmap = dev->regmap;
 
 	dev_info(&client->dev, "Realtek RTL2832 successfully attached\n");
 	return 0;
@@ -1282,7 +1117,7 @@
 
 	cancel_delayed_work_sync(&dev->i2c_gate_work);
 
-	i2c_del_mux_adapter(dev->i2c_adapter_tuner);
+	i2c_mux_del_adapters(dev->muxc);
 
 	regmap_exit(dev->regmap);
 
diff --git a/drivers/media/dvb-frontends/rtl2832.h b/drivers/media/dvb-frontends/rtl2832.h
index 6390af6..03c0de0 100644
--- a/drivers/media/dvb-frontends/rtl2832.h
+++ b/drivers/media/dvb-frontends/rtl2832.h
@@ -57,9 +57,7 @@
 	int (*pid_filter)(struct dvb_frontend *, u8, u16, int);
 	int (*pid_filter_ctrl)(struct dvb_frontend *, int);
 /* private: Register access for SDR module use only */
-	int (*bulk_read)(struct i2c_client *, unsigned int, void *, size_t);
-	int (*bulk_write)(struct i2c_client *, unsigned int, const void *, size_t);
-	int (*update_bits)(struct i2c_client *, unsigned int, unsigned int, unsigned int);
+	struct regmap *regmap;
 };
 
 #endif /* RTL2832_H */
diff --git a/drivers/media/dvb-frontends/rtl2832_priv.h b/drivers/media/dvb-frontends/rtl2832_priv.h
index 6b875f4..c1a8a69 100644
--- a/drivers/media/dvb-frontends/rtl2832_priv.h
+++ b/drivers/media/dvb-frontends/rtl2832_priv.h
@@ -33,10 +33,9 @@
 struct rtl2832_dev {
 	struct rtl2832_platform_data *pdata;
 	struct i2c_client *client;
-	struct mutex regmap_mutex;
 	struct regmap_config regmap_config;
 	struct regmap *regmap;
-	struct i2c_adapter *i2c_adapter_tuner;
+	struct i2c_mux_core *muxc;
 	struct dvb_frontend fe;
 	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c
index b860f02..47a480a 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.c
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.c
@@ -35,6 +35,7 @@
 #include <linux/platform_device.h>
 #include <linux/jiffies.h>
 #include <linux/math64.h>
+#include <linux/regmap.h>
 
 static bool rtl2832_sdr_emulated_fmt;
 module_param_named(emulated_formats, rtl2832_sdr_emulated_fmt, bool, 0644);
@@ -119,6 +120,7 @@
 	unsigned long flags;
 
 	struct platform_device *pdev;
+	struct regmap *regmap;
 
 	struct video_device vdev;
 	struct v4l2_device v4l2_dev;
@@ -163,47 +165,6 @@
 	unsigned long jiffies_next;
 };
 
-/* write multiple registers */
-static int rtl2832_sdr_wr_regs(struct rtl2832_sdr_dev *dev, u16 reg,
-		const u8 *val, int len)
-{
-	struct platform_device *pdev = dev->pdev;
-	struct rtl2832_sdr_platform_data *pdata = pdev->dev.platform_data;
-	struct i2c_client *client = pdata->i2c_client;
-
-	return pdata->bulk_write(client, reg, val, len);
-}
-
-#if 0
-/* read multiple registers */
-static int rtl2832_sdr_rd_regs(struct rtl2832_sdr_dev *dev, u16 reg, u8 *val,
-		int len)
-{
-	struct platform_device *pdev = dev->pdev;
-	struct rtl2832_sdr_platform_data *pdata = pdev->dev.platform_data;
-	struct i2c_client *client = pdata->i2c_client;
-
-	return pdata->bulk_read(client, reg, val, len);
-}
-#endif
-
-/* write single register */
-static int rtl2832_sdr_wr_reg(struct rtl2832_sdr_dev *dev, u16 reg, u8 val)
-{
-	return rtl2832_sdr_wr_regs(dev, reg, &val, 1);
-}
-
-/* write single register with mask */
-static int rtl2832_sdr_wr_reg_mask(struct rtl2832_sdr_dev *dev, u16 reg,
-		u8 val, u8 mask)
-{
-	struct platform_device *pdev = dev->pdev;
-	struct rtl2832_sdr_platform_data *pdata = pdev->dev.platform_data;
-	struct i2c_client *client = pdata->i2c_client;
-
-	return pdata->update_bits(client, reg, mask, val);
-}
-
 /* Private functions */
 static struct rtl2832_sdr_frame_buf *rtl2832_sdr_get_next_fill_buf(
 		struct rtl2832_sdr_dev *dev)
@@ -558,11 +519,11 @@
 
 	f_sr = dev->f_adc;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x13e, "\x00\x00", 2);
+	ret = regmap_bulk_write(dev->regmap, 0x13e, "\x00\x00", 2);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x115, "\x00\x00\x00\x00", 4);
+	ret = regmap_bulk_write(dev->regmap, 0x115, "\x00\x00\x00\x00", 4);
 	if (ret)
 		goto err;
 
@@ -588,7 +549,7 @@
 	buf[1] = (u32tmp >>  8) & 0xff;
 	buf[2] = (u32tmp >>  0) & 0xff;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x119, buf, 3);
+	ret = regmap_bulk_write(dev->regmap, 0x119, buf, 3);
 	if (ret)
 		goto err;
 
@@ -602,15 +563,15 @@
 		u8tmp2 = 0xcd; /* enable ADC I, ADC Q */
 	}
 
-	ret = rtl2832_sdr_wr_reg(dev, 0x1b1, u8tmp1);
+	ret = regmap_write(dev->regmap, 0x1b1, u8tmp1);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_reg(dev, 0x008, u8tmp2);
+	ret = regmap_write(dev->regmap, 0x008, u8tmp2);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_reg(dev, 0x006, 0x80);
+	ret = regmap_write(dev->regmap, 0x006, 0x80);
 	if (ret)
 		goto err;
 
@@ -621,168 +582,169 @@
 	buf[1] = (u32tmp >> 16) & 0xff;
 	buf[2] = (u32tmp >>  8) & 0xff;
 	buf[3] = (u32tmp >>  0) & 0xff;
-	ret = rtl2832_sdr_wr_regs(dev, 0x19f, buf, 4);
+	ret = regmap_bulk_write(dev->regmap, 0x19f, buf, 4);
 	if (ret)
 		goto err;
 
 	/* low-pass filter */
-	ret = rtl2832_sdr_wr_regs(dev, 0x11c,
-			"\xca\xdc\xd7\xd8\xe0\xf2\x0e\x35\x06\x50\x9c\x0d\x71\x11\x14\x71\x74\x19\x41\xa5",
-			20);
+	ret = regmap_bulk_write(dev->regmap, 0x11c,
+				"\xca\xdc\xd7\xd8\xe0\xf2\x0e\x35\x06\x50\x9c\x0d\x71\x11\x14\x71\x74\x19\x41\xa5",
+				20);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x017, "\x11\x10", 2);
+	ret = regmap_bulk_write(dev->regmap, 0x017, "\x11\x10", 2);
 	if (ret)
 		goto err;
 
 	/* mode */
-	ret = rtl2832_sdr_wr_regs(dev, 0x019, "\x05", 1);
+	ret = regmap_write(dev->regmap, 0x019, 0x05);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x01a, "\x1b\x16\x0d\x06\x01\xff", 6);
+	ret = regmap_bulk_write(dev->regmap, 0x01a,
+				"\x1b\x16\x0d\x06\x01\xff", 6);
 	if (ret)
 		goto err;
 
 	/* FSM */
-	ret = rtl2832_sdr_wr_regs(dev, 0x192, "\x00\xf0\x0f", 3);
+	ret = regmap_bulk_write(dev->regmap, 0x192, "\x00\xf0\x0f", 3);
 	if (ret)
 		goto err;
 
 	/* PID filter */
-	ret = rtl2832_sdr_wr_regs(dev, 0x061, "\x60", 1);
+	ret = regmap_write(dev->regmap, 0x061, 0x60);
 	if (ret)
 		goto err;
 
 	/* used RF tuner based settings */
 	switch (pdata->tuner) {
 	case RTL2832_SDR_TUNER_E4000:
-		ret = rtl2832_sdr_wr_regs(dev, 0x112, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x102, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x103, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c7, "\x30", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x104, "\xd0", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x105, "\xbe", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c8, "\x18", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x106, "\x35", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c9, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ca, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cb, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x107, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cd, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ce, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x108, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x109, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10a, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10b, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x011, "\xd4", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1e5, "\xf0", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d9, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1db, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1dd, "\x14", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1de, "\xec", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d8, "\x0c", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1e6, "\x02", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d7, "\x09", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00d, "\x83", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x010, "\x49", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00d, "\x87", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00d, "\x85", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x013, "\x02", 1);
+		ret = regmap_write(dev->regmap, 0x112, 0x5a);
+		ret = regmap_write(dev->regmap, 0x102, 0x40);
+		ret = regmap_write(dev->regmap, 0x103, 0x5a);
+		ret = regmap_write(dev->regmap, 0x1c7, 0x30);
+		ret = regmap_write(dev->regmap, 0x104, 0xd0);
+		ret = regmap_write(dev->regmap, 0x105, 0xbe);
+		ret = regmap_write(dev->regmap, 0x1c8, 0x18);
+		ret = regmap_write(dev->regmap, 0x106, 0x35);
+		ret = regmap_write(dev->regmap, 0x1c9, 0x21);
+		ret = regmap_write(dev->regmap, 0x1ca, 0x21);
+		ret = regmap_write(dev->regmap, 0x1cb, 0x00);
+		ret = regmap_write(dev->regmap, 0x107, 0x40);
+		ret = regmap_write(dev->regmap, 0x1cd, 0x10);
+		ret = regmap_write(dev->regmap, 0x1ce, 0x10);
+		ret = regmap_write(dev->regmap, 0x108, 0x80);
+		ret = regmap_write(dev->regmap, 0x109, 0x7f);
+		ret = regmap_write(dev->regmap, 0x10a, 0x80);
+		ret = regmap_write(dev->regmap, 0x10b, 0x7f);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x011, 0xd4);
+		ret = regmap_write(dev->regmap, 0x1e5, 0xf0);
+		ret = regmap_write(dev->regmap, 0x1d9, 0x00);
+		ret = regmap_write(dev->regmap, 0x1db, 0x00);
+		ret = regmap_write(dev->regmap, 0x1dd, 0x14);
+		ret = regmap_write(dev->regmap, 0x1de, 0xec);
+		ret = regmap_write(dev->regmap, 0x1d8, 0x0c);
+		ret = regmap_write(dev->regmap, 0x1e6, 0x02);
+		ret = regmap_write(dev->regmap, 0x1d7, 0x09);
+		ret = regmap_write(dev->regmap, 0x00d, 0x83);
+		ret = regmap_write(dev->regmap, 0x010, 0x49);
+		ret = regmap_write(dev->regmap, 0x00d, 0x87);
+		ret = regmap_write(dev->regmap, 0x00d, 0x85);
+		ret = regmap_write(dev->regmap, 0x013, 0x02);
 		break;
 	case RTL2832_SDR_TUNER_FC0012:
 	case RTL2832_SDR_TUNER_FC0013:
-		ret = rtl2832_sdr_wr_regs(dev, 0x112, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x102, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x103, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c7, "\x2c", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x104, "\xcc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x105, "\xbe", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c8, "\x16", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x106, "\x35", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c9, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ca, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cb, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x107, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cd, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ce, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x108, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x109, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10a, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10b, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x011, "\xe9\xbf", 2);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1e5, "\xf0", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d9, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1db, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1dd, "\x11", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1de, "\xef", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d8, "\x0c", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1e6, "\x02", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1d7, "\x09", 1);
+		ret = regmap_write(dev->regmap, 0x112, 0x5a);
+		ret = regmap_write(dev->regmap, 0x102, 0x40);
+		ret = regmap_write(dev->regmap, 0x103, 0x5a);
+		ret = regmap_write(dev->regmap, 0x1c7, 0x2c);
+		ret = regmap_write(dev->regmap, 0x104, 0xcc);
+		ret = regmap_write(dev->regmap, 0x105, 0xbe);
+		ret = regmap_write(dev->regmap, 0x1c8, 0x16);
+		ret = regmap_write(dev->regmap, 0x106, 0x35);
+		ret = regmap_write(dev->regmap, 0x1c9, 0x21);
+		ret = regmap_write(dev->regmap, 0x1ca, 0x21);
+		ret = regmap_write(dev->regmap, 0x1cb, 0x00);
+		ret = regmap_write(dev->regmap, 0x107, 0x40);
+		ret = regmap_write(dev->regmap, 0x1cd, 0x10);
+		ret = regmap_write(dev->regmap, 0x1ce, 0x10);
+		ret = regmap_write(dev->regmap, 0x108, 0x80);
+		ret = regmap_write(dev->regmap, 0x109, 0x7f);
+		ret = regmap_write(dev->regmap, 0x10a, 0x80);
+		ret = regmap_write(dev->regmap, 0x10b, 0x7f);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_bulk_write(dev->regmap, 0x011, "\xe9\xbf", 2);
+		ret = regmap_write(dev->regmap, 0x1e5, 0xf0);
+		ret = regmap_write(dev->regmap, 0x1d9, 0x00);
+		ret = regmap_write(dev->regmap, 0x1db, 0x00);
+		ret = regmap_write(dev->regmap, 0x1dd, 0x11);
+		ret = regmap_write(dev->regmap, 0x1de, 0xef);
+		ret = regmap_write(dev->regmap, 0x1d8, 0x0c);
+		ret = regmap_write(dev->regmap, 0x1e6, 0x02);
+		ret = regmap_write(dev->regmap, 0x1d7, 0x09);
 		break;
 	case RTL2832_SDR_TUNER_R820T:
 	case RTL2832_SDR_TUNER_R828D:
-		ret = rtl2832_sdr_wr_regs(dev, 0x112, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x102, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x115, "\x01", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x103, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c7, "\x24", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x104, "\xcc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x105, "\xbe", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c8, "\x14", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x106, "\x35", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c9, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ca, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cb, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x107, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cd, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ce, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x108, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x109, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10a, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10b, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x011, "\xf4", 1);
+		ret = regmap_write(dev->regmap, 0x112, 0x5a);
+		ret = regmap_write(dev->regmap, 0x102, 0x40);
+		ret = regmap_write(dev->regmap, 0x115, 0x01);
+		ret = regmap_write(dev->regmap, 0x103, 0x80);
+		ret = regmap_write(dev->regmap, 0x1c7, 0x24);
+		ret = regmap_write(dev->regmap, 0x104, 0xcc);
+		ret = regmap_write(dev->regmap, 0x105, 0xbe);
+		ret = regmap_write(dev->regmap, 0x1c8, 0x14);
+		ret = regmap_write(dev->regmap, 0x106, 0x35);
+		ret = regmap_write(dev->regmap, 0x1c9, 0x21);
+		ret = regmap_write(dev->regmap, 0x1ca, 0x21);
+		ret = regmap_write(dev->regmap, 0x1cb, 0x00);
+		ret = regmap_write(dev->regmap, 0x107, 0x40);
+		ret = regmap_write(dev->regmap, 0x1cd, 0x10);
+		ret = regmap_write(dev->regmap, 0x1ce, 0x10);
+		ret = regmap_write(dev->regmap, 0x108, 0x80);
+		ret = regmap_write(dev->regmap, 0x109, 0x7f);
+		ret = regmap_write(dev->regmap, 0x10a, 0x80);
+		ret = regmap_write(dev->regmap, 0x10b, 0x7f);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x011, 0xf4);
 		break;
 	case RTL2832_SDR_TUNER_FC2580:
-		ret = rtl2832_sdr_wr_regs(dev, 0x112, "\x39", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x102, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x103, "\x5a", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c7, "\x2c", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x104, "\xcc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x105, "\xbe", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c8, "\x16", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x106, "\x35", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1c9, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ca, "\x21", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cb, "\x00", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x107, "\x40", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1cd, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x1ce, "\x10", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x108, "\x80", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x109, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10a, "\x9c", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x10b, "\x7f", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x00e, "\xfc", 1);
-		ret = rtl2832_sdr_wr_regs(dev, 0x011, "\xe9\xf4", 2);
+		ret = regmap_write(dev->regmap, 0x112, 0x39);
+		ret = regmap_write(dev->regmap, 0x102, 0x40);
+		ret = regmap_write(dev->regmap, 0x103, 0x5a);
+		ret = regmap_write(dev->regmap, 0x1c7, 0x2c);
+		ret = regmap_write(dev->regmap, 0x104, 0xcc);
+		ret = regmap_write(dev->regmap, 0x105, 0xbe);
+		ret = regmap_write(dev->regmap, 0x1c8, 0x16);
+		ret = regmap_write(dev->regmap, 0x106, 0x35);
+		ret = regmap_write(dev->regmap, 0x1c9, 0x21);
+		ret = regmap_write(dev->regmap, 0x1ca, 0x21);
+		ret = regmap_write(dev->regmap, 0x1cb, 0x00);
+		ret = regmap_write(dev->regmap, 0x107, 0x40);
+		ret = regmap_write(dev->regmap, 0x1cd, 0x10);
+		ret = regmap_write(dev->regmap, 0x1ce, 0x10);
+		ret = regmap_write(dev->regmap, 0x108, 0x80);
+		ret = regmap_write(dev->regmap, 0x109, 0x7f);
+		ret = regmap_write(dev->regmap, 0x10a, 0x9c);
+		ret = regmap_write(dev->regmap, 0x10b, 0x7f);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_write(dev->regmap, 0x00e, 0xfc);
+		ret = regmap_bulk_write(dev->regmap, 0x011, "\xe9\xf4", 2);
 		break;
 	default:
 		dev_notice(&pdev->dev, "Unsupported tuner\n");
 	}
 
 	/* software reset */
-	ret = rtl2832_sdr_wr_reg_mask(dev, 0x101, 0x04, 0x04);
+	ret = regmap_update_bits(dev->regmap, 0x101, 0x04, 0x04);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_reg_mask(dev, 0x101, 0x00, 0x04);
+	ret = regmap_update_bits(dev->regmap, 0x101, 0x04, 0x00);
 	if (ret)
 		goto err;
 err:
@@ -797,29 +759,29 @@
 	dev_dbg(&pdev->dev, "\n");
 
 	/* PID filter */
-	ret = rtl2832_sdr_wr_regs(dev, 0x061, "\xe0", 1);
+	ret = regmap_write(dev->regmap, 0x061, 0xe0);
 	if (ret)
 		goto err;
 
 	/* mode */
-	ret = rtl2832_sdr_wr_regs(dev, 0x019, "\x20", 1);
+	ret = regmap_write(dev->regmap, 0x019, 0x20);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x017, "\x11\x10", 2);
+	ret = regmap_bulk_write(dev->regmap, 0x017, "\x11\x10", 2);
 	if (ret)
 		goto err;
 
 	/* FSM */
-	ret = rtl2832_sdr_wr_regs(dev, 0x192, "\x00\x0f\xff", 3);
+	ret = regmap_bulk_write(dev->regmap, 0x192, "\x00\x0f\xff", 3);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x13e, "\x40\x00", 2);
+	ret = regmap_bulk_write(dev->regmap, 0x13e, "\x40\x00", 2);
 	if (ret)
 		goto err;
 
-	ret = rtl2832_sdr_wr_regs(dev, 0x115, "\x06\x3f\xce\xcc", 4);
+	ret = regmap_bulk_write(dev->regmap, 0x115, "\x06\x3f\xce\xcc", 4);
 	if (ret)
 		goto err;
 err:
@@ -1399,6 +1361,7 @@
 	subdev = pdata->v4l2_subdev;
 	dev->v4l2_subdev = pdata->v4l2_subdev;
 	dev->pdev = pdev;
+	dev->regmap = pdata->regmap;
 	dev->udev = pdata->dvb_usb_device->udev;
 	dev->f_adc = bands_adc[0].rangelow;
 	dev->f_tuner = bands_fm[0].rangelow;
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.h b/drivers/media/dvb-frontends/rtl2832_sdr.h
index 342ea84..d8fc7e7 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.h
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.h
@@ -56,10 +56,7 @@
 #define RTL2832_SDR_TUNER_R828D     0x2b
 	u8 tuner;
 
-	struct i2c_client *i2c_client;
-	int (*bulk_read)(struct i2c_client *, unsigned int, void *, size_t);
-	int (*bulk_write)(struct i2c_client *, unsigned int, const void *, size_t);
-	int (*update_bits)(struct i2c_client *, unsigned int, unsigned int, unsigned int);
+	struct regmap *regmap;
 	struct dvb_frontend *dvb_frontend;
 	struct v4l2_subdev *v4l2_subdev;
 	struct dvb_usb_device *dvb_usb_device;
diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index 821a8f4..108a069 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -18,53 +18,23 @@
 
 static const struct dvb_frontend_ops si2168_ops;
 
-/* Own I2C adapter locking is needed because of I2C gate logic. */
-static int si2168_i2c_master_send_unlocked(const struct i2c_client *client,
-					   const char *buf, int count)
-{
-	int ret;
-	struct i2c_msg msg = {
-		.addr = client->addr,
-		.flags = 0,
-		.len = count,
-		.buf = (char *)buf,
-	};
-
-	ret = __i2c_transfer(client->adapter, &msg, 1);
-	return (ret == 1) ? count : ret;
-}
-
-static int si2168_i2c_master_recv_unlocked(const struct i2c_client *client,
-					   char *buf, int count)
-{
-	int ret;
-	struct i2c_msg msg = {
-		.addr = client->addr,
-		.flags = I2C_M_RD,
-		.len = count,
-		.buf = buf,
-	};
-
-	ret = __i2c_transfer(client->adapter, &msg, 1);
-	return (ret == 1) ? count : ret;
-}
-
 /* execute firmware command */
-static int si2168_cmd_execute_unlocked(struct i2c_client *client,
-				       struct si2168_cmd *cmd)
+static int si2168_cmd_execute(struct i2c_client *client, struct si2168_cmd *cmd)
 {
+	struct si2168_dev *dev = i2c_get_clientdata(client);
 	int ret;
 	unsigned long timeout;
 
+	mutex_lock(&dev->i2c_mutex);
+
 	if (cmd->wlen) {
 		/* write cmd and args for firmware */
-		ret = si2168_i2c_master_send_unlocked(client, cmd->args,
-						      cmd->wlen);
+		ret = i2c_master_send(client, cmd->args, cmd->wlen);
 		if (ret < 0) {
-			goto err;
+			goto err_mutex_unlock;
 		} else if (ret != cmd->wlen) {
 			ret = -EREMOTEIO;
-			goto err;
+			goto err_mutex_unlock;
 		}
 	}
 
@@ -73,13 +43,12 @@
 		#define TIMEOUT 70
 		timeout = jiffies + msecs_to_jiffies(TIMEOUT);
 		while (!time_after(jiffies, timeout)) {
-			ret = si2168_i2c_master_recv_unlocked(client, cmd->args,
-							      cmd->rlen);
+			ret = i2c_master_recv(client, cmd->args, cmd->rlen);
 			if (ret < 0) {
-				goto err;
+				goto err_mutex_unlock;
 			} else if (ret != cmd->rlen) {
 				ret = -EREMOTEIO;
-				goto err;
+				goto err_mutex_unlock;
 			}
 
 			/* firmware ready? */
@@ -94,32 +63,23 @@
 		/* error bit set? */
 		if ((cmd->args[0] >> 6) & 0x01) {
 			ret = -EREMOTEIO;
-			goto err;
+			goto err_mutex_unlock;
 		}
 
 		if (!((cmd->args[0] >> 7) & 0x01)) {
 			ret = -ETIMEDOUT;
-			goto err;
+			goto err_mutex_unlock;
 		}
 	}
 
+	mutex_unlock(&dev->i2c_mutex);
 	return 0;
-err:
+err_mutex_unlock:
+	mutex_unlock(&dev->i2c_mutex);
 	dev_dbg(&client->dev, "failed=%d\n", ret);
 	return ret;
 }
 
-static int si2168_cmd_execute(struct i2c_client *client, struct si2168_cmd *cmd)
-{
-	int ret;
-
-	i2c_lock_adapter(client->adapter);
-	ret = si2168_cmd_execute_unlocked(client, cmd);
-	i2c_unlock_adapter(client->adapter);
-
-	return ret;
-}
-
 static int si2168_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
@@ -610,14 +570,9 @@
 	return 0;
 }
 
-/*
- * I2C gate logic
- * We must use unlocked I2C I/O because I2C adapter lock is already taken
- * by the caller (usually tuner driver).
- */
-static int si2168_select(struct i2c_adapter *adap, void *mux_priv, u32 chan)
+static int si2168_select(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct i2c_client *client = mux_priv;
+	struct i2c_client *client = i2c_mux_priv(muxc);
 	int ret;
 	struct si2168_cmd cmd;
 
@@ -625,7 +580,7 @@
 	memcpy(cmd.args, "\xc0\x0d\x01", 3);
 	cmd.wlen = 3;
 	cmd.rlen = 0;
-	ret = si2168_cmd_execute_unlocked(client, &cmd);
+	ret = si2168_cmd_execute(client, &cmd);
 	if (ret)
 		goto err;
 
@@ -635,9 +590,9 @@
 	return ret;
 }
 
-static int si2168_deselect(struct i2c_adapter *adap, void *mux_priv, u32 chan)
+static int si2168_deselect(struct i2c_mux_core *muxc, u32 chan)
 {
-	struct i2c_client *client = mux_priv;
+	struct i2c_client *client = i2c_mux_priv(muxc);
 	int ret;
 	struct si2168_cmd cmd;
 
@@ -645,7 +600,7 @@
 	memcpy(cmd.args, "\xc0\x0d\x00", 3);
 	cmd.wlen = 3;
 	cmd.rlen = 0;
-	ret = si2168_cmd_execute_unlocked(client, &cmd);
+	ret = si2168_cmd_execute(client, &cmd);
 	if (ret)
 		goto err;
 
@@ -708,18 +663,25 @@
 		goto err;
 	}
 
+	mutex_init(&dev->i2c_mutex);
+
 	/* create mux i2c adapter for tuner */
-	dev->adapter = i2c_add_mux_adapter(client->adapter, &client->dev,
-			client, 0, 0, 0, si2168_select, si2168_deselect);
-	if (dev->adapter == NULL) {
-		ret = -ENODEV;
+	dev->muxc = i2c_mux_alloc(client->adapter, &client->dev,
+				  1, 0, I2C_MUX_LOCKED,
+				  si2168_select, si2168_deselect);
+	if (!dev->muxc) {
+		ret = -ENOMEM;
 		goto err_kfree;
 	}
+	dev->muxc->priv = client;
+	ret = i2c_mux_add_adapter(dev->muxc, 0, 0, 0);
+	if (ret)
+		goto err_kfree;
 
 	/* create dvb_frontend */
 	memcpy(&dev->fe.ops, &si2168_ops, sizeof(struct dvb_frontend_ops));
 	dev->fe.demodulator_priv = client;
-	*config->i2c_adapter = dev->adapter;
+	*config->i2c_adapter = dev->muxc->adapter[0];
 	*config->fe = &dev->fe;
 	dev->ts_mode = config->ts_mode;
 	dev->ts_clock_inv = config->ts_clock_inv;
@@ -743,7 +705,7 @@
 
 	dev_dbg(&client->dev, "\n");
 
-	i2c_del_mux_adapter(dev->adapter);
+	i2c_mux_del_adapters(dev->muxc);
 
 	dev->fe.ops.release = NULL;
 	dev->fe.demodulator_priv = NULL;
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index c07e6fe..8a1f36d 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -29,7 +29,8 @@
 
 /* state struct */
 struct si2168_dev {
-	struct i2c_adapter *adapter;
+	struct mutex i2c_mutex;
+	struct i2c_mux_core *muxc;
 	struct dvb_frontend fe;
 	enum fe_delivery_system delivery_system;
 	enum fe_status fe_status;
diff --git a/drivers/media/usb/cx231xx/cx231xx-core.c b/drivers/media/usb/cx231xx/cx231xx-core.c
index 6741fd0..630f4fc 100644
--- a/drivers/media/usb/cx231xx/cx231xx-core.c
+++ b/drivers/media/usb/cx231xx/cx231xx-core.c
@@ -1305,6 +1305,9 @@
 	cx231xx_i2c_register(&dev->i2c_bus[1]);
 	cx231xx_i2c_register(&dev->i2c_bus[2]);
 
+	errCode = cx231xx_i2c_mux_create(dev);
+	if (errCode < 0)
+		return errCode;
 	cx231xx_i2c_mux_register(dev, 0);
 	cx231xx_i2c_mux_register(dev, 1);
 
@@ -1427,8 +1430,7 @@
 void cx231xx_dev_uninit(struct cx231xx *dev)
 {
 	/* Un Initialize I2C bus */
-	cx231xx_i2c_mux_unregister(dev, 1);
-	cx231xx_i2c_mux_unregister(dev, 0);
+	cx231xx_i2c_mux_unregister(dev);
 	cx231xx_i2c_unregister(&dev->i2c_bus[2]);
 	cx231xx_i2c_unregister(&dev->i2c_bus[1]);
 	cx231xx_i2c_unregister(&dev->i2c_bus[0]);
diff --git a/drivers/media/usb/cx231xx/cx231xx-i2c.c b/drivers/media/usb/cx231xx/cx231xx-i2c.c
index a29c345..473cd34 100644
--- a/drivers/media/usb/cx231xx/cx231xx-i2c.c
+++ b/drivers/media/usb/cx231xx/cx231xx-i2c.c
@@ -557,40 +557,41 @@
  * cx231xx_i2c_mux_select()
  * switch i2c master number 1 between port1 and port3
  */
-static int cx231xx_i2c_mux_select(struct i2c_adapter *adap,
-			void *mux_priv, u32 chan_id)
+static int cx231xx_i2c_mux_select(struct i2c_mux_core *muxc, u32 chan_id)
 {
-	struct cx231xx *dev = mux_priv;
+	struct cx231xx *dev = i2c_mux_priv(muxc);
 
 	return cx231xx_enable_i2c_port_3(dev, chan_id);
 }
 
-int cx231xx_i2c_mux_register(struct cx231xx *dev, int mux_no)
+int cx231xx_i2c_mux_create(struct cx231xx *dev)
 {
-	struct i2c_adapter *i2c_parent = &dev->i2c_bus[1].i2c_adap;
-	/* what is the correct mux_dev? */
-	struct device *mux_dev = dev->dev;
-
-	dev->i2c_mux_adap[mux_no] = i2c_add_mux_adapter(i2c_parent,
-				mux_dev,
-				dev /* mux_priv */,
-				0,
-				mux_no /* chan_id */,
-				0 /* class */,
-				&cx231xx_i2c_mux_select,
-				NULL);
-
-	if (!dev->i2c_mux_adap[mux_no])
-		dev_warn(dev->dev,
-			 "i2c mux %d register FAILED\n", mux_no);
-
+	dev->muxc = i2c_mux_alloc(&dev->i2c_bus[1].i2c_adap, dev->dev, 2, 0, 0,
+				  cx231xx_i2c_mux_select, NULL);
+	if (!dev->muxc)
+		return -ENOMEM;
+	dev->muxc->priv = dev;
 	return 0;
 }
 
-void cx231xx_i2c_mux_unregister(struct cx231xx *dev, int mux_no)
+int cx231xx_i2c_mux_register(struct cx231xx *dev, int mux_no)
 {
-	i2c_del_mux_adapter(dev->i2c_mux_adap[mux_no]);
-	dev->i2c_mux_adap[mux_no] = NULL;
+	int rc;
+
+	rc = i2c_mux_add_adapter(dev->muxc,
+				 0,
+				 mux_no /* chan_id */,
+				 0 /* class */);
+	if (rc)
+		dev_warn(dev->dev,
+			 "i2c mux %d register FAILED\n", mux_no);
+
+	return rc;
+}
+
+void cx231xx_i2c_mux_unregister(struct cx231xx *dev)
+{
+	i2c_mux_del_adapters(dev->muxc);
 }
 
 struct i2c_adapter *cx231xx_get_i2c_adap(struct cx231xx *dev, int i2c_port)
@@ -603,9 +604,9 @@
 	case I2C_2:
 		return &dev->i2c_bus[2].i2c_adap;
 	case I2C_1_MUX_1:
-		return dev->i2c_mux_adap[0];
+		return dev->muxc->adapter[0];
 	case I2C_1_MUX_3:
-		return dev->i2c_mux_adap[1];
+		return dev->muxc->adapter[1];
 	default:
 		return NULL;
 	}
diff --git a/drivers/media/usb/cx231xx/cx231xx.h b/drivers/media/usb/cx231xx/cx231xx.h
index 69f6d20..90c8676 100644
--- a/drivers/media/usb/cx231xx/cx231xx.h
+++ b/drivers/media/usb/cx231xx/cx231xx.h
@@ -624,6 +624,7 @@
 
 	/* I2C adapters: Master 1 & 2 (External) & Master 3 (Internal only) */
 	struct cx231xx_i2c i2c_bus[3];
+	struct i2c_mux_core *muxc;
 	struct i2c_adapter *i2c_mux_adap[2];
 
 	unsigned int xc_fw_load_done:1;
@@ -760,8 +761,9 @@
 void cx231xx_do_i2c_scan(struct cx231xx *dev, int i2c_port);
 int cx231xx_i2c_register(struct cx231xx_i2c *bus);
 int cx231xx_i2c_unregister(struct cx231xx_i2c *bus);
+int cx231xx_i2c_mux_create(struct cx231xx *dev);
 int cx231xx_i2c_mux_register(struct cx231xx *dev, int mux_no);
-void cx231xx_i2c_mux_unregister(struct cx231xx *dev, int mux_no);
+void cx231xx_i2c_mux_unregister(struct cx231xx *dev);
 struct i2c_adapter *cx231xx_get_i2c_adap(struct cx231xx *dev, int i2c_port);
 
 /* Internal block control functions */
diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index fa72642..eb7af8c 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -1333,10 +1333,7 @@
 	case TUNER_RTL2832_R828D:
 		pdata.clk = dev->rtl2832_platform_data.clk;
 		pdata.tuner = dev->tuner;
-		pdata.i2c_client = dev->i2c_client_demod;
-		pdata.bulk_read = dev->rtl2832_platform_data.bulk_read;
-		pdata.bulk_write = dev->rtl2832_platform_data.bulk_write;
-		pdata.update_bits = dev->rtl2832_platform_data.update_bits;
+		pdata.regmap = dev->rtl2832_platform_data.regmap;
 		pdata.dvb_frontend = adap->fe[0];
 		pdata.dvb_usb_device = d;
 		pdata.v4l2_subdev = subdev;
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 089d694..6cc17b77 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -245,8 +245,7 @@
 		if (status == count)
 			return count;
 
-		/* REVISIT: at HZ=100, this is sloooow */
-		msleep(1);
+		usleep_range(1000, 1500);
 	} while (time_before(read_time, timeout));
 
 	return -ETIMEDOUT;
@@ -365,8 +364,7 @@
 		if (status == count)
 			return count;
 
-		/* REVISIT: at HZ=100, this is sloooow */
-		msleep(1);
+		usleep_range(1000, 1500);
 	} while (time_before(write_time, timeout));
 
 	return -ETIMEDOUT;
@@ -544,10 +542,7 @@
 		} else {
 			return -EPFNOSUPPORT;
 		}
-	}
 
-	/* Use I2C operations unless we're stuck with SMBus extensions. */
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		if (i2c_check_functionality(client->adapter,
 				I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
 			use_smbus_write = I2C_SMBUS_I2C_BLOCK_DATA;
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 91a469d..0a553c0 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -4,6 +4,7 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
@@ -673,121 +674,6 @@
 }
 EXPORT_SYMBOL(of_get_address);
 
-#ifdef PCI_IOBASE
-struct io_range {
-	struct list_head list;
-	phys_addr_t start;
-	resource_size_t size;
-};
-
-static LIST_HEAD(io_range_list);
-static DEFINE_SPINLOCK(io_range_lock);
-#endif
-
-/*
- * Record the PCI IO range (expressed as CPU physical address + size).
- * Return a negative value if an error has occured, zero otherwise
- */
-int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size)
-{
-	int err = 0;
-
-#ifdef PCI_IOBASE
-	struct io_range *range;
-	resource_size_t allocated_size = 0;
-
-	/* check if the range hasn't been previously recorded */
-	spin_lock(&io_range_lock);
-	list_for_each_entry(range, &io_range_list, list) {
-		if (addr >= range->start && addr + size <= range->start + size) {
-			/* range already registered, bail out */
-			goto end_register;
-		}
-		allocated_size += range->size;
-	}
-
-	/* range not registed yet, check for available space */
-	if (allocated_size + size - 1 > IO_SPACE_LIMIT) {
-		/* if it's too big check if 64K space can be reserved */
-		if (allocated_size + SZ_64K - 1 > IO_SPACE_LIMIT) {
-			err = -E2BIG;
-			goto end_register;
-		}
-
-		size = SZ_64K;
-		pr_warn("Requested IO range too big, new size set to 64K\n");
-	}
-
-	/* add the range to the list */
-	range = kzalloc(sizeof(*range), GFP_ATOMIC);
-	if (!range) {
-		err = -ENOMEM;
-		goto end_register;
-	}
-
-	range->start = addr;
-	range->size = size;
-
-	list_add_tail(&range->list, &io_range_list);
-
-end_register:
-	spin_unlock(&io_range_lock);
-#endif
-
-	return err;
-}
-
-phys_addr_t pci_pio_to_address(unsigned long pio)
-{
-	phys_addr_t address = (phys_addr_t)OF_BAD_ADDR;
-
-#ifdef PCI_IOBASE
-	struct io_range *range;
-	resource_size_t allocated_size = 0;
-
-	if (pio > IO_SPACE_LIMIT)
-		return address;
-
-	spin_lock(&io_range_lock);
-	list_for_each_entry(range, &io_range_list, list) {
-		if (pio >= allocated_size && pio < allocated_size + range->size) {
-			address = range->start + pio - allocated_size;
-			break;
-		}
-		allocated_size += range->size;
-	}
-	spin_unlock(&io_range_lock);
-#endif
-
-	return address;
-}
-
-unsigned long __weak pci_address_to_pio(phys_addr_t address)
-{
-#ifdef PCI_IOBASE
-	struct io_range *res;
-	resource_size_t offset = 0;
-	unsigned long addr = -1;
-
-	spin_lock(&io_range_lock);
-	list_for_each_entry(res, &io_range_list, list) {
-		if (address >= res->start && address < res->start + res->size) {
-			addr = address - res->start + offset;
-			break;
-		}
-		offset += res->size;
-	}
-	spin_unlock(&io_range_lock);
-
-	return addr;
-#else
-	if (address > IO_SPACE_LIMIT)
-		return (unsigned long)-1;
-
-	return (unsigned long) address;
-#endif
-}
-
 static int __of_address_to_resource(struct device_node *dev,
 		const __be32 *addrp, u64 size, unsigned int flags,
 		const char *name, struct resource *r)
diff --git a/drivers/of/device.c b/drivers/of/device.c
index e5f47ce..fd5cfad 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -88,7 +88,7 @@
 	int ret;
 	bool coherent;
 	unsigned long offset;
-	struct iommu_ops *iommu;
+	const struct iommu_ops *iommu;
 
 	/*
 	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index e986e6e..c1ebbfb 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -1692,13 +1692,7 @@
 
 #if IS_BUILTIN(CONFIG_I2C_MUX)
 
-struct unittest_i2c_mux_data {
-	int nchans;
-	struct i2c_adapter *adap[];
-};
-
-static int unittest_i2c_mux_select_chan(struct i2c_adapter *adap,
-			       void *client, u32 chan)
+static int unittest_i2c_mux_select_chan(struct i2c_mux_core *muxc, u32 chan)
 {
 	return 0;
 }
@@ -1706,11 +1700,11 @@
 static int unittest_i2c_mux_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
-	int ret, i, nchans, size;
+	int ret, i, nchans;
 	struct device *dev = &client->dev;
 	struct i2c_adapter *adap = to_i2c_adapter(dev->parent);
 	struct device_node *np = client->dev.of_node, *child;
-	struct unittest_i2c_mux_data *stm;
+	struct i2c_mux_core *muxc;
 	u32 reg, max_reg;
 
 	dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name);
@@ -1734,25 +1728,20 @@
 		return -EINVAL;
 	}
 
-	size = offsetof(struct unittest_i2c_mux_data, adap[nchans]);
-	stm = devm_kzalloc(dev, size, GFP_KERNEL);
-	if (!stm) {
-		dev_err(dev, "Out of memory\n");
+	muxc = i2c_mux_alloc(adap, dev, nchans, 0, 0,
+			     unittest_i2c_mux_select_chan, NULL);
+	if (!muxc)
 		return -ENOMEM;
-	}
-	stm->nchans = nchans;
 	for (i = 0; i < nchans; i++) {
-		stm->adap[i] = i2c_add_mux_adapter(adap, dev, client,
-				0, i, 0, unittest_i2c_mux_select_chan, NULL);
-		if (!stm->adap[i]) {
+		ret = i2c_mux_add_adapter(muxc, 0, i, 0);
+		if (ret) {
 			dev_err(dev, "Failed to register mux #%d\n", i);
-			for (i--; i >= 0; i--)
-				i2c_del_mux_adapter(stm->adap[i]);
+			i2c_mux_del_adapters(muxc);
 			return -ENODEV;
 		}
 	}
 
-	i2c_set_clientdata(client, stm);
+	i2c_set_clientdata(client, muxc);
 
 	return 0;
 };
@@ -1761,12 +1750,10 @@
 {
 	struct device *dev = &client->dev;
 	struct device_node *np = client->dev.of_node;
-	struct unittest_i2c_mux_data *stm = i2c_get_clientdata(client);
-	int i;
+	struct i2c_mux_core *muxc = i2c_get_clientdata(client);
 
 	dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name);
-	for (i = stm->nchans - 1; i >= 0; i--)
-		i2c_del_mux_adapter(stm->adap[i]);
+	i2c_mux_del_adapters(muxc);
 	return 0;
 }
 
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 209292e..56389be 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -83,6 +83,9 @@
 config PCI_ATS
 	bool
 
+config PCI_ECAM
+	bool
+
 config PCI_IOV
 	bool "PCI IOV support"
 	depends on PCI
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 2154092..1fa6925 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -55,6 +55,8 @@
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
 
+obj-$(CONFIG_PCI_ECAM) += ecam.o
+
 obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
 
 obj-$(CONFIG_OF) += of.o
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
new file mode 100644
index 0000000..f9832ad
--- /dev/null
+++ b/drivers/pci/ecam.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "ecam.h"
+
+/*
+ * On 64-bit systems, we do a single ioremap for the whole config space
+ * since we have enough virtual address range available.  On 32-bit, we
+ * ioremap the config space for each bus individually.
+ */
+static const bool per_bus_mapping = !config_enabled(CONFIG_64BIT);
+
+/*
+ * Create a PCI config space window
+ *  - reserve mem region
+ *  - alloc struct pci_config_window with space for all mappings
+ *  - ioremap the config space
+ */
+struct pci_config_window *pci_ecam_create(struct device *dev,
+		struct resource *cfgres, struct resource *busr,
+		struct pci_ecam_ops *ops)
+{
+	struct pci_config_window *cfg;
+	unsigned int bus_range, bus_range_max, bsz;
+	struct resource *conflict;
+	int i, err;
+
+	if (busr->start > busr->end)
+		return ERR_PTR(-EINVAL);
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg)
+		return ERR_PTR(-ENOMEM);
+
+	cfg->ops = ops;
+	cfg->busr.start = busr->start;
+	cfg->busr.end = busr->end;
+	cfg->busr.flags = IORESOURCE_BUS;
+	bus_range = resource_size(&cfg->busr);
+	bus_range_max = resource_size(cfgres) >> ops->bus_shift;
+	if (bus_range > bus_range_max) {
+		bus_range = bus_range_max;
+		cfg->busr.end = busr->start + bus_range - 1;
+		dev_warn(dev, "ECAM area %pR can only accommodate %pR (reduced from %pR desired)\n",
+			 cfgres, &cfg->busr, busr);
+	}
+	bsz = 1 << ops->bus_shift;
+
+	cfg->res.start = cfgres->start;
+	cfg->res.end = cfgres->end;
+	cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	cfg->res.name = "PCI ECAM";
+
+	conflict = request_resource_conflict(&iomem_resource, &cfg->res);
+	if (conflict) {
+		err = -EBUSY;
+		dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n",
+			&cfg->res, conflict->name, conflict);
+		goto err_exit;
+	}
+
+	if (per_bus_mapping) {
+		cfg->winp = kcalloc(bus_range, sizeof(*cfg->winp), GFP_KERNEL);
+		if (!cfg->winp)
+			goto err_exit_malloc;
+		for (i = 0; i < bus_range; i++) {
+			cfg->winp[i] = ioremap(cfgres->start + i * bsz, bsz);
+			if (!cfg->winp[i])
+				goto err_exit_iomap;
+		}
+	} else {
+		cfg->win = ioremap(cfgres->start, bus_range * bsz);
+		if (!cfg->win)
+			goto err_exit_iomap;
+	}
+
+	if (ops->init) {
+		err = ops->init(dev, cfg);
+		if (err)
+			goto err_exit;
+	}
+	dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr);
+	return cfg;
+
+err_exit_iomap:
+	dev_err(dev, "ECAM ioremap failed\n");
+err_exit_malloc:
+	err = -ENOMEM;
+err_exit:
+	pci_ecam_free(cfg);
+	return ERR_PTR(err);
+}
+
+void pci_ecam_free(struct pci_config_window *cfg)
+{
+	int i;
+
+	if (per_bus_mapping) {
+		if (cfg->winp) {
+			for (i = 0; i < resource_size(&cfg->busr); i++)
+				if (cfg->winp[i])
+					iounmap(cfg->winp[i]);
+			kfree(cfg->winp);
+		}
+	} else {
+		if (cfg->win)
+			iounmap(cfg->win);
+	}
+	if (cfg->res.parent)
+		release_resource(&cfg->res);
+	kfree(cfg);
+}
+
+/*
+ * Function to implement the pci_ops ->map_bus method
+ */
+void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	unsigned int devfn_shift = cfg->ops->bus_shift - 8;
+	unsigned int busn = bus->number;
+	void __iomem *base;
+
+	if (busn < cfg->busr.start || busn > cfg->busr.end)
+		return NULL;
+
+	busn -= cfg->busr.start;
+	if (per_bus_mapping)
+		base = cfg->winp[busn];
+	else
+		base = cfg->win + (busn << cfg->ops->bus_shift);
+	return base + (devfn << devfn_shift) + where;
+}
+
+/* ECAM ops */
+struct pci_ecam_ops pci_generic_ecam_ops = {
+	.bus_shift	= 20,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
diff --git a/drivers/pci/ecam.h b/drivers/pci/ecam.h
new file mode 100644
index 0000000..9878beb
--- /dev/null
+++ b/drivers/pci/ecam.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+#ifndef DRIVERS_PCI_ECAM_H
+#define DRIVERS_PCI_ECAM_H
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+/*
+ * struct to hold pci ops and bus shift of the config window
+ * for a PCI controller.
+ */
+struct pci_config_window;
+struct pci_ecam_ops {
+	unsigned int			bus_shift;
+	struct pci_ops			pci_ops;
+	int				(*init)(struct device *,
+						struct pci_config_window *);
+};
+
+/*
+ * struct to hold the mappings of a config space window. This
+ * is expected to be used as sysdata for PCI controllers that
+ * use ECAM.
+ */
+struct pci_config_window {
+	struct resource			res;
+	struct resource			busr;
+	void				*priv;
+	struct pci_ecam_ops		*ops;
+	union {
+		void __iomem		*win;	/* 64-bit single mapping */
+		void __iomem		**winp; /* 32-bit per-bus mapping */
+	};
+};
+
+/* create and free pci_config_window */
+struct pci_config_window *pci_ecam_create(struct device *dev,
+		struct resource *cfgres, struct resource *busr,
+		struct pci_ecam_ops *ops);
+void pci_ecam_free(struct pci_config_window *cfg);
+
+/* map_bus when ->sysdata is an instance of pci_config_window */
+void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where);
+/* default ECAM ops */
+extern struct pci_ecam_ops pci_generic_ecam_ops;
+
+#ifdef CONFIG_PCI_HOST_GENERIC
+/* for DT-based PCI controllers that support ECAM */
+int pci_host_common_probe(struct platform_device *pdev,
+			  struct pci_ecam_ops *ops);
+#endif
+#endif
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 8fb1cf5..5d2374e 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -72,11 +72,14 @@
 config PCIE_RCAR
 	bool "Renesas R-Car PCIe controller"
 	depends on ARCH_RENESAS || (ARM && COMPILE_TEST)
+	select PCI_MSI
+	select PCI_MSI_IRQ_DOMAIN
 	help
 	  Say Y here if you want PCIe controller support on R-Car SoCs.
 
 config PCI_HOST_COMMON
 	bool
+	select PCI_ECAM
 
 config PCI_HOST_GENERIC
 	bool "Generic PCI host controller"
@@ -231,4 +234,15 @@
 	help
 	  Say Y here if you want ECAM support for CN88XX-Pass-1.x Cavium Thunder SoCs.
 
+config PCIE_ARMADA_8K
+	bool "Marvell Armada-8K PCIe controller"
+	depends on ARCH_MVEBU
+	select PCIE_DW
+	select PCIEPORTBUS
+	help
+	  Say Y here if you want to enable PCIe controller support on
+	  Armada-8K SoCs. The PCIe controller on Armada-8K is based on
+	  Designware hardware and therefore the driver re-uses the
+	  Designware core functions to implement the driver.
+
 endmenu
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index d3d8e1b..9c8698e 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -28,3 +28,4 @@
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
 obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o
+obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c
index 2ca3a1f..f441130 100644
--- a/drivers/pci/host/pci-dra7xx.c
+++ b/drivers/pci/host/pci-dra7xx.c
@@ -142,13 +142,13 @@
 
 static void dra7xx_pcie_host_init(struct pcie_port *pp)
 {
-	dw_pcie_setup_rc(pp);
-
 	pp->io_base &= DRA7XX_CPU_TO_BUS_ADDR;
 	pp->mem_base &= DRA7XX_CPU_TO_BUS_ADDR;
 	pp->cfg0_base &= DRA7XX_CPU_TO_BUS_ADDR;
 	pp->cfg1_base &= DRA7XX_CPU_TO_BUS_ADDR;
 
+	dw_pcie_setup_rc(pp);
+
 	dra7xx_pcie_establish_link(pp);
 	if (IS_ENABLED(CONFIG_PCI_MSI))
 		dw_pcie_msi_init(pp);
diff --git a/drivers/pci/host/pci-host-common.c b/drivers/pci/host/pci-host-common.c
index e9f850f..8cba7ab 100644
--- a/drivers/pci/host/pci-host-common.c
+++ b/drivers/pci/host/pci-host-common.c
@@ -22,27 +22,21 @@
 #include <linux/of_pci.h>
 #include <linux/platform_device.h>
 
-#include "pci-host-common.h"
+#include "../ecam.h"
 
-static void gen_pci_release_of_pci_ranges(struct gen_pci *pci)
-{
-	pci_free_resource_list(&pci->resources);
-}
-
-static int gen_pci_parse_request_of_pci_ranges(struct gen_pci *pci)
+static int gen_pci_parse_request_of_pci_ranges(struct device *dev,
+		       struct list_head *resources, struct resource **bus_range)
 {
 	int err, res_valid = 0;
-	struct device *dev = pci->host.dev.parent;
 	struct device_node *np = dev->of_node;
 	resource_size_t iobase;
 	struct resource_entry *win;
 
-	err = of_pci_get_host_bridge_resources(np, 0, 0xff, &pci->resources,
-					       &iobase);
+	err = of_pci_get_host_bridge_resources(np, 0, 0xff, resources, &iobase);
 	if (err)
 		return err;
 
-	resource_list_for_each_entry(win, &pci->resources) {
+	resource_list_for_each_entry(win, resources) {
 		struct resource *parent, *res = win->res;
 
 		switch (resource_type(res)) {
@@ -60,7 +54,7 @@
 			res_valid |= !(res->flags & IORESOURCE_PREFETCH);
 			break;
 		case IORESOURCE_BUS:
-			pci->cfg.bus_range = res;
+			*bus_range = res;
 		default:
 			continue;
 		}
@@ -79,65 +73,60 @@
 	return 0;
 
 out_release_res:
-	gen_pci_release_of_pci_ranges(pci);
 	return err;
 }
 
-static int gen_pci_parse_map_cfg_windows(struct gen_pci *pci)
+static void gen_pci_unmap_cfg(void *ptr)
+{
+	pci_ecam_free((struct pci_config_window *)ptr);
+}
+
+static struct pci_config_window *gen_pci_init(struct device *dev,
+		struct list_head *resources, struct pci_ecam_ops *ops)
 {
 	int err;
-	u8 bus_max;
-	resource_size_t busn;
-	struct resource *bus_range;
-	struct device *dev = pci->host.dev.parent;
-	struct device_node *np = dev->of_node;
-	u32 sz = 1 << pci->cfg.ops->bus_shift;
+	struct resource cfgres;
+	struct resource *bus_range = NULL;
+	struct pci_config_window *cfg;
 
-	err = of_address_to_resource(np, 0, &pci->cfg.res);
+	/* Parse our PCI ranges and request their resources */
+	err = gen_pci_parse_request_of_pci_ranges(dev, resources, &bus_range);
+	if (err)
+		goto err_out;
+
+	err = of_address_to_resource(dev->of_node, 0, &cfgres);
 	if (err) {
 		dev_err(dev, "missing \"reg\" property\n");
-		return err;
+		goto err_out;
 	}
 
-	/* Limit the bus-range to fit within reg */
-	bus_max = pci->cfg.bus_range->start +
-		  (resource_size(&pci->cfg.res) >> pci->cfg.ops->bus_shift) - 1;
-	pci->cfg.bus_range->end = min_t(resource_size_t,
-					pci->cfg.bus_range->end, bus_max);
-
-	pci->cfg.win = devm_kcalloc(dev, resource_size(pci->cfg.bus_range),
-				    sizeof(*pci->cfg.win), GFP_KERNEL);
-	if (!pci->cfg.win)
-		return -ENOMEM;
-
-	/* Map our Configuration Space windows */
-	if (!devm_request_mem_region(dev, pci->cfg.res.start,
-				     resource_size(&pci->cfg.res),
-				     "Configuration Space"))
-		return -ENOMEM;
-
-	bus_range = pci->cfg.bus_range;
-	for (busn = bus_range->start; busn <= bus_range->end; ++busn) {
-		u32 idx = busn - bus_range->start;
-
-		pci->cfg.win[idx] = devm_ioremap(dev,
-						 pci->cfg.res.start + idx * sz,
-						 sz);
-		if (!pci->cfg.win[idx])
-			return -ENOMEM;
+	cfg = pci_ecam_create(dev, &cfgres, bus_range, ops);
+	if (IS_ERR(cfg)) {
+		err = PTR_ERR(cfg);
+		goto err_out;
 	}
 
-	return 0;
+	err = devm_add_action(dev, gen_pci_unmap_cfg, cfg);
+	if (err) {
+		gen_pci_unmap_cfg(cfg);
+		goto err_out;
+	}
+	return cfg;
+
+err_out:
+	pci_free_resource_list(resources);
+	return ERR_PTR(err);
 }
 
 int pci_host_common_probe(struct platform_device *pdev,
-			  struct gen_pci *pci)
+			  struct pci_ecam_ops *ops)
 {
-	int err;
 	const char *type;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct pci_bus *bus, *child;
+	struct pci_config_window *cfg;
+	struct list_head resources;
 
 	type = of_get_property(np, "device_type", NULL);
 	if (!type || strcmp(type, "pci")) {
@@ -147,29 +136,18 @@
 
 	of_pci_check_probe_only();
 
-	pci->host.dev.parent = dev;
-	INIT_LIST_HEAD(&pci->host.windows);
-	INIT_LIST_HEAD(&pci->resources);
-
-	/* Parse our PCI ranges and request their resources */
-	err = gen_pci_parse_request_of_pci_ranges(pci);
-	if (err)
-		return err;
-
 	/* Parse and map our Configuration Space windows */
-	err = gen_pci_parse_map_cfg_windows(pci);
-	if (err) {
-		gen_pci_release_of_pci_ranges(pci);
-		return err;
-	}
+	INIT_LIST_HEAD(&resources);
+	cfg = gen_pci_init(dev, &resources, ops);
+	if (IS_ERR(cfg))
+		return PTR_ERR(cfg);
 
 	/* Do not reassign resources if probe only */
 	if (!pci_has_flag(PCI_PROBE_ONLY))
 		pci_add_flags(PCI_REASSIGN_ALL_RSRC | PCI_REASSIGN_ALL_BUS);
 
-
-	bus = pci_scan_root_bus(dev, pci->cfg.bus_range->start,
-				&pci->cfg.ops->ops, pci, &pci->resources);
+	bus = pci_scan_root_bus(dev, cfg->busr.start, &ops->pci_ops, cfg,
+				&resources);
 	if (!bus) {
 		dev_err(dev, "Scanning rootbus failed");
 		return -ENODEV;
diff --git a/drivers/pci/host/pci-host-common.h b/drivers/pci/host/pci-host-common.h
deleted file mode 100644
index 09f3fa0..0000000
--- a/drivers/pci/host/pci-host-common.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Copyright (C) 2014 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#ifndef _PCI_HOST_COMMON_H
-#define _PCI_HOST_COMMON_H
-
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-
-struct gen_pci_cfg_bus_ops {
-	u32 bus_shift;
-	struct pci_ops ops;
-};
-
-struct gen_pci_cfg_windows {
-	struct resource				res;
-	struct resource				*bus_range;
-	void __iomem				**win;
-
-	struct gen_pci_cfg_bus_ops		*ops;
-};
-
-struct gen_pci {
-	struct pci_host_bridge			host;
-	struct gen_pci_cfg_windows		cfg;
-	struct list_head			resources;
-};
-
-int pci_host_common_probe(struct platform_device *pdev,
-			  struct gen_pci *pci);
-
-#endif /* _PCI_HOST_COMMON_H */
diff --git a/drivers/pci/host/pci-host-generic.c b/drivers/pci/host/pci-host-generic.c
index e8aa78f..6eaceab 100644
--- a/drivers/pci/host/pci-host-generic.c
+++ b/drivers/pci/host/pci-host-generic.c
@@ -25,41 +25,12 @@
 #include <linux/of_pci.h>
 #include <linux/platform_device.h>
 
-#include "pci-host-common.h"
+#include "../ecam.h"
 
-static void __iomem *gen_pci_map_cfg_bus_cam(struct pci_bus *bus,
-					     unsigned int devfn,
-					     int where)
-{
-	struct gen_pci *pci = bus->sysdata;
-	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
-
-	return pci->cfg.win[idx] + ((devfn << 8) | where);
-}
-
-static struct gen_pci_cfg_bus_ops gen_pci_cfg_cam_bus_ops = {
+static struct pci_ecam_ops gen_pci_cfg_cam_bus_ops = {
 	.bus_shift	= 16,
-	.ops		= {
-		.map_bus	= gen_pci_map_cfg_bus_cam,
-		.read		= pci_generic_config_read,
-		.write		= pci_generic_config_write,
-	}
-};
-
-static void __iomem *gen_pci_map_cfg_bus_ecam(struct pci_bus *bus,
-					      unsigned int devfn,
-					      int where)
-{
-	struct gen_pci *pci = bus->sysdata;
-	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
-
-	return pci->cfg.win[idx] + ((devfn << 12) | where);
-}
-
-static struct gen_pci_cfg_bus_ops gen_pci_cfg_ecam_bus_ops = {
-	.bus_shift	= 20,
-	.ops		= {
-		.map_bus	= gen_pci_map_cfg_bus_ecam,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
 		.read		= pci_generic_config_read,
 		.write		= pci_generic_config_write,
 	}
@@ -70,25 +41,22 @@
 	  .data = &gen_pci_cfg_cam_bus_ops },
 
 	{ .compatible = "pci-host-ecam-generic",
-	  .data = &gen_pci_cfg_ecam_bus_ops },
+	  .data = &pci_generic_ecam_ops },
 
 	{ },
 };
+
 MODULE_DEVICE_TABLE(of, gen_pci_of_match);
 
 static int gen_pci_probe(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
 	const struct of_device_id *of_id;
-	struct gen_pci *pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	struct pci_ecam_ops *ops;
 
-	if (!pci)
-		return -ENOMEM;
+	of_id = of_match_node(gen_pci_of_match, pdev->dev.of_node);
+	ops = (struct pci_ecam_ops *)of_id->data;
 
-	of_id = of_match_node(gen_pci_of_match, dev->of_node);
-	pci->cfg.ops = (struct gen_pci_cfg_bus_ops *)of_id->data;
-
-	return pci_host_common_probe(pdev, pci);
+	return pci_host_common_probe(pdev, ops);
 }
 
 static struct platform_driver gen_pci_driver = {
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ed651ba..58f7eeb 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -553,6 +553,8 @@
 		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
 		/* Choose the function to be read. (See comment above) */
 		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
+		/* Make sure the function was chosen before we start reading. */
+		mb();
 		/* Read from that function's config space. */
 		switch (size) {
 		case 1:
@@ -565,6 +567,11 @@
 			*val = readl(addr);
 			break;
 		}
+		/*
+		 * Make sure the write was done before we release the spinlock
+		 * allowing consecutive reads/writes.
+		 */
+		mb();
 		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
 	} else {
 		dev_err(&hpdev->hbus->hdev->device,
@@ -592,6 +599,8 @@
 		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
 		/* Choose the function to be written. (See comment above) */
 		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
+		/* Make sure the function was chosen before we start writing. */
+		wmb();
 		/* Write to that function's config space. */
 		switch (size) {
 		case 1:
@@ -604,6 +613,11 @@
 			writel(val, addr);
 			break;
 		}
+		/*
+		 * Make sure the write was done before we release the spinlock
+		 * allowing consecutive reads/writes.
+		 */
+		mb();
 		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
 	} else {
 		dev_err(&hpdev->hbus->hdev->device,
@@ -2268,11 +2282,6 @@
 
 	hbus = hv_get_drvdata(hdev);
 
-	ret = hv_send_resources_released(hdev);
-	if (ret)
-		dev_err(&hdev->device,
-			"Couldn't send resources released packet(s)\n");
-
 	memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
 	init_completion(&comp_pkt.host_event);
 	pkt.teardown_packet.completion_func = hv_pci_generic_compl;
@@ -2295,6 +2304,11 @@
 		pci_unlock_rescan_remove();
 	}
 
+	ret = hv_send_resources_released(hdev);
+	if (ret)
+		dev_err(&hdev->device,
+			"Couldn't send resources released packet(s)\n");
+
 	vmbus_close(hdev->channel);
 
 	/* Delete any children which might still exist. */
diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index 2f817fa..b741a36 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -19,6 +19,7 @@
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
+#include <linux/of_device.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -31,19 +32,29 @@
 
 #define to_imx6_pcie(x)	container_of(x, struct imx6_pcie, pp)
 
+enum imx6_pcie_variants {
+	IMX6Q,
+	IMX6SX,
+	IMX6QP,
+};
+
 struct imx6_pcie {
 	int			reset_gpio;
+	bool			gpio_active_high;
 	struct clk		*pcie_bus;
 	struct clk		*pcie_phy;
+	struct clk		*pcie_inbound_axi;
 	struct clk		*pcie;
 	struct pcie_port	pp;
 	struct regmap		*iomuxc_gpr;
+	enum imx6_pcie_variants variant;
 	void __iomem		*mem_base;
 	u32			tx_deemph_gen1;
 	u32			tx_deemph_gen2_3p5db;
 	u32			tx_deemph_gen2_6db;
 	u32			tx_swing_full;
 	u32			tx_swing_low;
+	int			link_gen;
 };
 
 /* PCIe Root Complex registers (memory-mapped) */
@@ -236,39 +247,95 @@
 	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp);
 	u32 val, gpr1, gpr12;
 
-	/*
-	 * If the bootloader already enabled the link we need some special
-	 * handling to get the core back into a state where it is safe to
-	 * touch it for configuration.  As there is no dedicated reset signal
-	 * wired up for MX6QDL, we need to manually force LTSSM into "detect"
-	 * state before completely disabling LTSSM, which is a prerequisite
-	 * for core configuration.
-	 *
-	 * If both LTSSM_ENABLE and REF_SSP_ENABLE are active we have a strong
-	 * indication that the bootloader activated the link.
-	 */
-	regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, &gpr1);
-	regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, &gpr12);
-
-	if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
-	    (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
-		val = readl(pp->dbi_base + PCIE_PL_PFLR);
-		val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
-		val |= PCIE_PL_PFLR_FORCE_LINK;
-		writel(val, pp->dbi_base + PCIE_PL_PFLR);
-
+	switch (imx6_pcie->variant) {
+	case IMX6SX:
 		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
-				IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN,
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN);
+		/* Force PCIe PHY reset */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET);
+		break;
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_SW_RST,
+				   IMX6Q_GPR1_PCIE_SW_RST);
+		break;
+	case IMX6Q:
+		/*
+		 * If the bootloader already enabled the link we need some
+		 * special handling to get the core back into a state where
+		 * it is safe to touch it for configuration.  As there is
+		 * no dedicated reset signal wired up for MX6QDL, we need
+		 * to manually force LTSSM into "detect" state before
+		 * completely disabling LTSSM, which is a prerequisite for
+		 * core configuration.
+		 *
+		 * If both LTSSM_ENABLE and REF_SSP_ENABLE are active we
+		 * have a strong indication that the bootloader activated
+		 * the link.
+		 */
+		regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1, &gpr1);
+		regmap_read(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, &gpr12);
+
+		if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) &&
+		    (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) {
+			val = readl(pp->dbi_base + PCIE_PL_PFLR);
+			val &= ~PCIE_PL_PFLR_LINK_STATE_MASK;
+			val |= PCIE_PL_PFLR_FORCE_LINK;
+			writel(val, pp->dbi_base + PCIE_PL_PFLR);
+
+			regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+					   IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+		}
+
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_TEST_PD, 1 << 18);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_REF_CLK_EN, 0 << 16);
+		break;
 	}
 
-	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
-			IMX6Q_GPR1_PCIE_TEST_PD, 1 << 18);
-	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
-			IMX6Q_GPR1_PCIE_REF_CLK_EN, 0 << 16);
-
 	return 0;
 }
 
+static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie)
+{
+	struct pcie_port *pp = &imx6_pcie->pp;
+	int ret = 0;
+
+	switch (imx6_pcie->variant) {
+	case IMX6SX:
+		ret = clk_prepare_enable(imx6_pcie->pcie_inbound_axi);
+		if (ret) {
+			dev_err(pp->dev, "unable to enable pcie_axi clock\n");
+			break;
+		}
+
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0);
+		break;
+	case IMX6QP: 		/* FALLTHROUGH */
+	case IMX6Q:
+		/* power up core phy and enable ref clock */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_TEST_PD, 0 << 18);
+		/*
+		 * the async reset input need ref clock to sync internally,
+		 * when the ref clock comes after reset, internal synced
+		 * reset time is too short, cannot meet the requirement.
+		 * add one ~10us delay here.
+		 */
+		udelay(10);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_REF_CLK_EN, 1 << 16);
+		break;
+	}
+
+	return ret;
+}
+
 static int imx6_pcie_deassert_core_reset(struct pcie_port *pp)
 {
 	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp);
@@ -292,43 +359,60 @@
 		goto err_pcie;
 	}
 
-	/* power up core phy and enable ref clock */
-	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
-			IMX6Q_GPR1_PCIE_TEST_PD, 0 << 18);
-	/*
-	 * the async reset input need ref clock to sync internally,
-	 * when the ref clock comes after reset, internal synced
-	 * reset time is too short, cannot meet the requirement.
-	 * add one ~10us delay here.
-	 */
-	udelay(10);
-	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
-			IMX6Q_GPR1_PCIE_REF_CLK_EN, 1 << 16);
+	ret = imx6_pcie_enable_ref_clk(imx6_pcie);
+	if (ret) {
+		dev_err(pp->dev, "unable to enable pcie ref clock\n");
+		goto err_ref_clk;
+	}
 
 	/* allow the clocks to stabilize */
 	usleep_range(200, 500);
 
 	/* Some boards don't have PCIe reset GPIO. */
 	if (gpio_is_valid(imx6_pcie->reset_gpio)) {
-		gpio_set_value_cansleep(imx6_pcie->reset_gpio, 0);
+		gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+					imx6_pcie->gpio_active_high);
 		msleep(100);
-		gpio_set_value_cansleep(imx6_pcie->reset_gpio, 1);
+		gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+					!imx6_pcie->gpio_active_high);
 	}
+
+	switch (imx6_pcie->variant) {
+	case IMX6SX:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET, 0);
+		break;
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_SW_RST, 0);
+
+		usleep_range(200, 500);
+		break;
+	case IMX6Q:		/* Nothing to do */
+		break;
+	}
+
 	return 0;
 
+err_ref_clk:
+	clk_disable_unprepare(imx6_pcie->pcie);
 err_pcie:
 	clk_disable_unprepare(imx6_pcie->pcie_bus);
 err_pcie_bus:
 	clk_disable_unprepare(imx6_pcie->pcie_phy);
 err_pcie_phy:
 	return ret;
-
 }
 
 static void imx6_pcie_init_phy(struct pcie_port *pp)
 {
 	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp);
 
+	if (imx6_pcie->variant == IMX6SX)
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6SX_GPR12_PCIE_RX_EQ_MASK,
+				   IMX6SX_GPR12_PCIE_RX_EQ_2);
+
 	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
 			IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
 
@@ -417,11 +501,15 @@
 		goto err_reset_phy;
 	}
 
-	/* Allow Gen2 mode after the link is up. */
-	tmp = readl(pp->dbi_base + PCIE_RC_LCR);
-	tmp &= ~PCIE_RC_LCR_MAX_LINK_SPEEDS_MASK;
-	tmp |= PCIE_RC_LCR_MAX_LINK_SPEEDS_GEN2;
-	writel(tmp, pp->dbi_base + PCIE_RC_LCR);
+	if (imx6_pcie->link_gen == 2) {
+		/* Allow Gen2 mode after the link is up. */
+		tmp = readl(pp->dbi_base + PCIE_RC_LCR);
+		tmp &= ~PCIE_RC_LCR_MAX_LINK_SPEEDS_MASK;
+		tmp |= PCIE_RC_LCR_MAX_LINK_SPEEDS_GEN2;
+		writel(tmp, pp->dbi_base + PCIE_RC_LCR);
+	} else {
+		dev_info(pp->dev, "Link: Gen2 disabled\n");
+	}
 
 	/*
 	 * Start Directed Speed Change so the best possible speed both link
@@ -445,8 +533,7 @@
 	}
 
 	tmp = readl(pp->dbi_base + PCIE_RC_LCSR);
-	dev_dbg(pp->dev, "Link up, Gen=%i\n", (tmp >> 16) & 0xf);
-
+	dev_info(pp->dev, "Link up, Gen%i\n", (tmp >> 16) & 0xf);
 	return 0;
 
 err_reset_phy:
@@ -535,6 +622,9 @@
 	pp = &imx6_pcie->pp;
 	pp->dev = &pdev->dev;
 
+	imx6_pcie->variant =
+		(enum imx6_pcie_variants)of_device_get_match_data(&pdev->dev);
+
 	/* Added for PCI abort handling */
 	hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0,
 		"imprecise external abort");
@@ -546,9 +636,14 @@
 
 	/* Fetch GPIOs */
 	imx6_pcie->reset_gpio = of_get_named_gpio(np, "reset-gpio", 0);
+	imx6_pcie->gpio_active_high = of_property_read_bool(np,
+						"reset-gpio-active-high");
 	if (gpio_is_valid(imx6_pcie->reset_gpio)) {
 		ret = devm_gpio_request_one(&pdev->dev, imx6_pcie->reset_gpio,
-					    GPIOF_OUT_INIT_LOW, "PCIe reset");
+				imx6_pcie->gpio_active_high ?
+					GPIOF_OUT_INIT_HIGH :
+					GPIOF_OUT_INIT_LOW,
+				"PCIe reset");
 		if (ret) {
 			dev_err(&pdev->dev, "unable to get reset gpio\n");
 			return ret;
@@ -577,6 +672,16 @@
 		return PTR_ERR(imx6_pcie->pcie);
 	}
 
+	if (imx6_pcie->variant == IMX6SX) {
+		imx6_pcie->pcie_inbound_axi = devm_clk_get(&pdev->dev,
+							   "pcie_inbound_axi");
+		if (IS_ERR(imx6_pcie->pcie_inbound_axi)) {
+			dev_err(&pdev->dev,
+				"pcie_incbound_axi clock missing or invalid\n");
+			return PTR_ERR(imx6_pcie->pcie_inbound_axi);
+		}
+	}
+
 	/* Grab GPR config register range */
 	imx6_pcie->iomuxc_gpr =
 		 syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
@@ -606,6 +711,12 @@
 				 &imx6_pcie->tx_swing_low))
 		imx6_pcie->tx_swing_low = 127;
 
+	/* Limit link speed */
+	ret = of_property_read_u32(pp->dev->of_node, "fsl,max-link-speed",
+				   &imx6_pcie->link_gen);
+	if (ret)
+		imx6_pcie->link_gen = 1;
+
 	ret = imx6_add_pcie_port(pp, pdev);
 	if (ret < 0)
 		return ret;
@@ -623,7 +734,9 @@
 }
 
 static const struct of_device_id imx6_pcie_of_match[] = {
-	{ .compatible = "fsl,imx6q-pcie", },
+	{ .compatible = "fsl,imx6q-pcie",  .data = (void *)IMX6Q,  },
+	{ .compatible = "fsl,imx6sx-pcie", .data = (void *)IMX6SX, },
+	{ .compatible = "fsl,imx6qp-pcie", .data = (void *)IMX6QP, },
 	{},
 };
 MODULE_DEVICE_TABLE(of, imx6_pcie_of_match);
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index 6153853..4151509 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -14,6 +14,7 @@
 
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/irqreturn.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_pci.h>
@@ -53,6 +54,21 @@
 #define IRQ_STATUS			0x184
 #define MSI_IRQ_OFFSET			4
 
+/* Error IRQ bits */
+#define ERR_AER		BIT(5)	/* ECRC error */
+#define ERR_AXI		BIT(4)	/* AXI tag lookup fatal error */
+#define ERR_CORR	BIT(3)	/* Correctable error */
+#define ERR_NONFATAL	BIT(2)	/* Non-fatal error */
+#define ERR_FATAL	BIT(1)	/* Fatal error */
+#define ERR_SYS		BIT(0)	/* System (fatal, non-fatal, or correctable) */
+#define ERR_IRQ_ALL	(ERR_AER | ERR_AXI | ERR_CORR | \
+			 ERR_NONFATAL | ERR_FATAL | ERR_SYS)
+#define ERR_FATAL_IRQ	(ERR_FATAL | ERR_AXI)
+#define ERR_IRQ_STATUS_RAW		0x1c0
+#define ERR_IRQ_STATUS			0x1c4
+#define ERR_IRQ_ENABLE_SET		0x1c8
+#define ERR_IRQ_ENABLE_CLR		0x1cc
+
 /* Config space registers */
 #define DEBUG0				0x728
 
@@ -243,6 +259,28 @@
 	writel(offset, ks_pcie->va_app_base + IRQ_EOI);
 }
 
+void ks_dw_pcie_enable_error_irq(void __iomem *reg_base)
+{
+	writel(ERR_IRQ_ALL, reg_base + ERR_IRQ_ENABLE_SET);
+}
+
+irqreturn_t ks_dw_pcie_handle_error_irq(struct device *dev,
+					void __iomem *reg_base)
+{
+	u32 status;
+
+	status = readl(reg_base + ERR_IRQ_STATUS_RAW) & ERR_IRQ_ALL;
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & ERR_FATAL_IRQ)
+		dev_err(dev, "fatal error (status %#010x)\n", status);
+
+	/* Ack the IRQ; status bits are RW1C */
+	writel(status, reg_base + ERR_IRQ_STATUS);
+	return IRQ_HANDLED;
+}
+
 static void ks_dw_pcie_ack_legacy_irq(struct irq_data *d)
 {
 }
diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index b71f55b..6b8301e 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -15,6 +15,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/msi.h>
@@ -159,7 +160,7 @@
 static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie,
 					   char *controller, int *num_irqs)
 {
-	int temp, max_host_irqs, legacy = 1, *host_irqs, ret = -EINVAL;
+	int temp, max_host_irqs, legacy = 1, *host_irqs;
 	struct device *dev = ks_pcie->pp.dev;
 	struct device_node *np_pcie = dev->of_node, **np_temp;
 
@@ -180,11 +181,15 @@
 	*np_temp = of_find_node_by_name(np_pcie, controller);
 	if (!(*np_temp)) {
 		dev_err(dev, "Node for %s is absent\n", controller);
-		goto out;
+		return -EINVAL;
 	}
+
 	temp = of_irq_count(*np_temp);
-	if (!temp)
-		goto out;
+	if (!temp) {
+		dev_err(dev, "No IRQ entries in %s\n", controller);
+		return -EINVAL;
+	}
+
 	if (temp > max_host_irqs)
 		dev_warn(dev, "Too many %s interrupts defined %u\n",
 			(legacy ? "legacy" : "MSI"), temp);
@@ -198,12 +203,13 @@
 		if (!host_irqs[temp])
 			break;
 	}
+
 	if (temp) {
 		*num_irqs = temp;
-		ret = 0;
+		return 0;
 	}
-out:
-	return ret;
+
+	return -EINVAL;
 }
 
 static void ks_pcie_setup_interrupts(struct keystone_pcie *ks_pcie)
@@ -226,6 +232,9 @@
 							 ks_pcie);
 		}
 	}
+
+	if (ks_pcie->error_irq > 0)
+		ks_dw_pcie_enable_error_irq(ks_pcie->va_app_base);
 }
 
 /*
@@ -289,6 +298,14 @@
 	.scan_bus = ks_dw_pcie_v3_65_scan_bus,
 };
 
+static irqreturn_t pcie_err_irq_handler(int irq, void *priv)
+{
+	struct keystone_pcie *ks_pcie = priv;
+
+	return ks_dw_pcie_handle_error_irq(ks_pcie->pp.dev,
+					   ks_pcie->va_app_base);
+}
+
 static int __init ks_add_pcie_port(struct keystone_pcie *ks_pcie,
 			 struct platform_device *pdev)
 {
@@ -309,6 +326,22 @@
 			return ret;
 	}
 
+	/*
+	 * Index 0 is the platform interrupt for error interrupt
+	 * from RC.  This is optional.
+	 */
+	ks_pcie->error_irq = irq_of_parse_and_map(ks_pcie->np, 0);
+	if (ks_pcie->error_irq <= 0)
+		dev_info(&pdev->dev, "no error IRQ defined\n");
+	else {
+		if (request_irq(ks_pcie->error_irq, pcie_err_irq_handler,
+				IRQF_SHARED, "pcie-error-irq", ks_pcie) < 0) {
+			dev_err(&pdev->dev, "failed to request error IRQ %d\n",
+				ks_pcie->error_irq);
+			return ret;
+		}
+	}
+
 	pp->root_bus_nr = -1;
 	pp->ops = &keystone_pcie_host_ops;
 	ret = ks_dw_pcie_host_init(ks_pcie, ks_pcie->msi_intc_np);
@@ -317,7 +350,7 @@
 		return ret;
 	}
 
-	return ret;
+	return 0;
 }
 
 static const struct of_device_id ks_pcie_of_match[] = {
@@ -346,7 +379,7 @@
 	struct resource *res;
 	void __iomem *reg_p;
 	struct phy *phy;
-	int ret = 0;
+	int ret;
 
 	ks_pcie = devm_kzalloc(&pdev->dev, sizeof(*ks_pcie),
 				GFP_KERNEL);
@@ -376,6 +409,7 @@
 	devm_release_mem_region(dev, res->start, resource_size(res));
 
 	pp->dev = dev;
+	ks_pcie->np = dev->of_node;
 	platform_set_drvdata(pdev, ks_pcie);
 	ks_pcie->clk = devm_clk_get(dev, "pcie");
 	if (IS_ERR(ks_pcie->clk)) {
diff --git a/drivers/pci/host/pci-keystone.h b/drivers/pci/host/pci-keystone.h
index f0944e8..a5b0cb2 100644
--- a/drivers/pci/host/pci-keystone.h
+++ b/drivers/pci/host/pci-keystone.h
@@ -29,6 +29,9 @@
 	int			msi_host_irqs[MAX_MSI_HOST_IRQS];
 	struct			device_node *msi_intc_np;
 	struct irq_domain	*legacy_irq_domain;
+	struct device_node	*np;
+
+	int error_irq;
 
 	/* Application register space */
 	void __iomem		*va_app_base;
@@ -42,6 +45,9 @@
 /* Keystone specific PCI controller APIs */
 void ks_dw_pcie_enable_legacy_irqs(struct keystone_pcie *ks_pcie);
 void ks_dw_pcie_handle_legacy_irq(struct keystone_pcie *ks_pcie, int offset);
+void ks_dw_pcie_enable_error_irq(void __iomem *reg_base);
+irqreturn_t ks_dw_pcie_handle_error_irq(struct device *dev,
+					void __iomem *reg_base);
 int  ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie,
 			struct device_node *msi_intc_np);
 int ks_dw_pcie_wr_other_conf(struct pcie_port *pp, struct pci_bus *bus,
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 53b79c5..6b451df 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -1003,6 +1003,7 @@
 		pcie->msi->dev = &pcie->pdev->dev;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int mvebu_pcie_suspend(struct device *dev)
 {
 	struct mvebu_pcie *pcie;
@@ -1031,6 +1032,7 @@
 
 	return 0;
 }
+#endif
 
 static void mvebu_pcie_port_clk_put(void *data)
 {
@@ -1298,9 +1300,8 @@
 };
 MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
 
-static struct dev_pm_ops mvebu_pcie_pm_ops = {
-	.suspend_noirq = mvebu_pcie_suspend,
-	.resume_noirq = mvebu_pcie_resume,
+static const struct dev_pm_ops mvebu_pcie_pm_ops = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mvebu_pcie_suspend, mvebu_pcie_resume)
 };
 
 static struct platform_driver mvebu_pcie_driver = {
diff --git a/drivers/pci/host/pci-thunder-ecam.c b/drivers/pci/host/pci-thunder-ecam.c
index d71935cb..540d030 100644
--- a/drivers/pci/host/pci-thunder-ecam.c
+++ b/drivers/pci/host/pci-thunder-ecam.c
@@ -13,18 +13,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
-#include "pci-host-common.h"
-
-/* Mapping is standard ECAM */
-static void __iomem *thunder_ecam_map_bus(struct pci_bus *bus,
-					  unsigned int devfn,
-					  int where)
-{
-	struct gen_pci *pci = bus->sysdata;
-	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
-
-	return pci->cfg.win[idx] + ((devfn << 12) | where);
-}
+#include "../ecam.h"
 
 static void set_val(u32 v, int where, int size, u32 *val)
 {
@@ -99,7 +88,7 @@
 static int thunder_ecam_p2_config_read(struct pci_bus *bus, unsigned int devfn,
 				       int where, int size, u32 *val)
 {
-	struct gen_pci *pci = bus->sysdata;
+	struct pci_config_window *cfg = bus->sysdata;
 	int where_a = where & ~3;
 	void __iomem *addr;
 	u32 node_bits;
@@ -129,7 +118,7 @@
 	 * the config space access window.  Since we are working with
 	 * the high-order 32 bits, shift everything down by 32 bits.
 	 */
-	node_bits = (pci->cfg.res.start >> 32) & (1 << 12);
+	node_bits = (cfg->res.start >> 32) & (1 << 12);
 
 	v |= node_bits;
 	set_val(v, where, size, val);
@@ -358,36 +347,24 @@
 	return pci_generic_config_write(bus, devfn, where, size, val);
 }
 
-static struct gen_pci_cfg_bus_ops thunder_ecam_bus_ops = {
+static struct pci_ecam_ops pci_thunder_ecam_ops = {
 	.bus_shift	= 20,
-	.ops		= {
-		.map_bus        = thunder_ecam_map_bus,
+	.pci_ops	= {
+		.map_bus        = pci_ecam_map_bus,
 		.read           = thunder_ecam_config_read,
 		.write          = thunder_ecam_config_write,
 	}
 };
 
 static const struct of_device_id thunder_ecam_of_match[] = {
-	{ .compatible = "cavium,pci-host-thunder-ecam",
-	  .data = &thunder_ecam_bus_ops },
-
+	{ .compatible = "cavium,pci-host-thunder-ecam" },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, thunder_ecam_of_match);
 
 static int thunder_ecam_probe(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	const struct of_device_id *of_id;
-	struct gen_pci *pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
-
-	if (!pci)
-		return -ENOMEM;
-
-	of_id = of_match_node(thunder_ecam_of_match, dev->of_node);
-	pci->cfg.ops = (struct gen_pci_cfg_bus_ops *)of_id->data;
-
-	return pci_host_common_probe(pdev, pci);
+	return pci_host_common_probe(pdev, &pci_thunder_ecam_ops);
 }
 
 static struct platform_driver thunder_ecam_driver = {
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index cabb92a..9b8ab94 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -20,34 +20,22 @@
 #include <linux/of_pci.h>
 #include <linux/platform_device.h>
 
-#include "pci-host-common.h"
+#include "../ecam.h"
 
 #define PEM_CFG_WR 0x28
 #define PEM_CFG_RD 0x30
 
 struct thunder_pem_pci {
-	struct gen_pci	gen_pci;
 	u32		ea_entry[3];
 	void __iomem	*pem_reg_base;
 };
 
-static void __iomem *thunder_pem_map_bus(struct pci_bus *bus,
-					 unsigned int devfn, int where)
-{
-	struct gen_pci *pci = bus->sysdata;
-	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
-
-	return pci->cfg.win[idx] + ((devfn << 16) | where);
-}
-
 static int thunder_pem_bridge_read(struct pci_bus *bus, unsigned int devfn,
 				   int where, int size, u32 *val)
 {
 	u64 read_val;
-	struct thunder_pem_pci *pem_pci;
-	struct gen_pci *pci = bus->sysdata;
-
-	pem_pci = container_of(pci, struct thunder_pem_pci, gen_pci);
+	struct pci_config_window *cfg = bus->sysdata;
+	struct thunder_pem_pci *pem_pci = (struct thunder_pem_pci *)cfg->priv;
 
 	if (devfn != 0 || where >= 2048) {
 		*val = ~0;
@@ -132,17 +120,17 @@
 static int thunder_pem_config_read(struct pci_bus *bus, unsigned int devfn,
 				   int where, int size, u32 *val)
 {
-	struct gen_pci *pci = bus->sysdata;
+	struct pci_config_window *cfg = bus->sysdata;
 
-	if (bus->number < pci->cfg.bus_range->start ||
-	    bus->number > pci->cfg.bus_range->end)
+	if (bus->number < cfg->busr.start ||
+	    bus->number > cfg->busr.end)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	/*
 	 * The first device on the bus is the PEM PCIe bridge.
 	 * Special case its config access.
 	 */
-	if (bus->number == pci->cfg.bus_range->start)
+	if (bus->number == cfg->busr.start)
 		return thunder_pem_bridge_read(bus, devfn, where, size, val);
 
 	return pci_generic_config_read(bus, devfn, where, size, val);
@@ -153,11 +141,11 @@
  * reserved bits, this makes the code simpler and is OK as the bits
  * are not affected by writing zeros to them.
  */
-static u32 thunder_pem_bridge_w1c_bits(int where)
+static u32 thunder_pem_bridge_w1c_bits(u64 where_aligned)
 {
 	u32 w1c_bits = 0;
 
-	switch (where & ~3) {
+	switch (where_aligned) {
 	case 0x04: /* Command/Status */
 	case 0x1c: /* Base and I/O Limit/Secondary Status */
 		w1c_bits = 0xff000000;
@@ -184,15 +172,36 @@
 	return w1c_bits;
 }
 
+/* Some bits must be written to one so they appear to be read-only. */
+static u32 thunder_pem_bridge_w1_bits(u64 where_aligned)
+{
+	u32 w1_bits;
+
+	switch (where_aligned) {
+	case 0x1c: /* I/O Base / I/O Limit, Secondary Status */
+		/* Force 32-bit I/O addressing. */
+		w1_bits = 0x0101;
+		break;
+	case 0x24: /* Prefetchable Memory Base / Prefetchable Memory Limit */
+		/* Force 64-bit addressing */
+		w1_bits = 0x00010001;
+		break;
+	default:
+		w1_bits = 0;
+		break;
+	}
+	return w1_bits;
+}
+
 static int thunder_pem_bridge_write(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 val)
 {
-	struct gen_pci *pci = bus->sysdata;
-	struct thunder_pem_pci *pem_pci;
+	struct pci_config_window *cfg = bus->sysdata;
+	struct thunder_pem_pci *pem_pci = (struct thunder_pem_pci *)cfg->priv;
 	u64 write_val, read_val;
+	u64 where_aligned = where & ~3ull;
 	u32 mask = 0;
 
-	pem_pci = container_of(pci, struct thunder_pem_pci, gen_pci);
 
 	if (devfn != 0 || where >= 2048)
 		return PCIBIOS_DEVICE_NOT_FOUND;
@@ -205,8 +214,7 @@
 	 */
 	switch (size) {
 	case 1:
-		read_val = where & ~3ull;
-		writeq(read_val, pem_pci->pem_reg_base + PEM_CFG_RD);
+		writeq(where_aligned, pem_pci->pem_reg_base + PEM_CFG_RD);
 		read_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
 		read_val >>= 32;
 		mask = ~(0xff << (8 * (where & 3)));
@@ -215,8 +223,7 @@
 		val |= (u32)read_val;
 		break;
 	case 2:
-		read_val = where & ~3ull;
-		writeq(read_val, pem_pci->pem_reg_base + PEM_CFG_RD);
+		writeq(where_aligned, pem_pci->pem_reg_base + PEM_CFG_RD);
 		read_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
 		read_val >>= 32;
 		mask = ~(0xffff << (8 * (where & 3)));
@@ -244,11 +251,17 @@
 	}
 
 	/*
+	 * Some bits must be read-only with value of one.  Since the
+	 * access method allows these to be cleared if a zero is
+	 * written, force them to one before writing.
+	 */
+	val |= thunder_pem_bridge_w1_bits(where_aligned);
+
+	/*
 	 * Low order bits are the config address, the high order 32
 	 * bits are the data to be written.
 	 */
-	write_val = where & ~3ull;
-	write_val |= (((u64)val) << 32);
+	write_val = (((u64)val) << 32) | where_aligned;
 	writeq(write_val, pem_pci->pem_reg_base + PEM_CFG_WR);
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -256,53 +269,38 @@
 static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 val)
 {
-	struct gen_pci *pci = bus->sysdata;
+	struct pci_config_window *cfg = bus->sysdata;
 
-	if (bus->number < pci->cfg.bus_range->start ||
-	    bus->number > pci->cfg.bus_range->end)
+	if (bus->number < cfg->busr.start ||
+	    bus->number > cfg->busr.end)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 	/*
 	 * The first device on the bus is the PEM PCIe bridge.
 	 * Special case its config access.
 	 */
-	if (bus->number == pci->cfg.bus_range->start)
+	if (bus->number == cfg->busr.start)
 		return thunder_pem_bridge_write(bus, devfn, where, size, val);
 
 
 	return pci_generic_config_write(bus, devfn, where, size, val);
 }
 
-static struct gen_pci_cfg_bus_ops thunder_pem_bus_ops = {
-	.bus_shift	= 24,
-	.ops		= {
-		.map_bus	= thunder_pem_map_bus,
-		.read		= thunder_pem_config_read,
-		.write		= thunder_pem_config_write,
-	}
-};
-
-static const struct of_device_id thunder_pem_of_match[] = {
-	{ .compatible = "cavium,pci-host-thunder-pem",
-	  .data = &thunder_pem_bus_ops },
-
-	{ },
-};
-MODULE_DEVICE_TABLE(of, thunder_pem_of_match);
-
-static int thunder_pem_probe(struct platform_device *pdev)
+static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg)
 {
-	struct device *dev = &pdev->dev;
-	const struct of_device_id *of_id;
 	resource_size_t bar4_start;
 	struct resource *res_pem;
 	struct thunder_pem_pci *pem_pci;
+	struct platform_device *pdev;
+
+	/* Only OF support for now */
+	if (!dev->of_node)
+		return -EINVAL;
 
 	pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL);
 	if (!pem_pci)
 		return -ENOMEM;
 
-	of_id = of_match_node(thunder_pem_of_match, dev->of_node);
-	pem_pci->gen_pci.cfg.ops = (struct gen_pci_cfg_bus_ops *)of_id->data;
+	pdev = to_platform_device(dev);
 
 	/*
 	 * The second register range is the PEM bridge to the PCIe
@@ -330,7 +328,29 @@
 	pem_pci->ea_entry[1] = (u32)(res_pem->end - bar4_start) & ~3u;
 	pem_pci->ea_entry[2] = (u32)(bar4_start >> 32);
 
-	return pci_host_common_probe(pdev, &pem_pci->gen_pci);
+	cfg->priv = pem_pci;
+	return 0;
+}
+
+static struct pci_ecam_ops pci_thunder_pem_ops = {
+	.bus_shift	= 24,
+	.init		= thunder_pem_init,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= thunder_pem_config_read,
+		.write		= thunder_pem_config_write,
+	}
+};
+
+static const struct of_device_id thunder_pem_of_match[] = {
+	{ .compatible = "cavium,pci-host-thunder-pem" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, thunder_pem_of_match);
+
+static int thunder_pem_probe(struct platform_device *pdev)
+{
+	return pci_host_common_probe(pdev, &pci_thunder_pem_ops);
 }
 
 static struct platform_driver thunder_pem_driver = {
diff --git a/drivers/pci/host/pcie-armada8k.c b/drivers/pci/host/pcie-armada8k.c
new file mode 100644
index 0000000..5572356
--- /dev/null
+++ b/drivers/pci/host/pcie-armada8k.c
@@ -0,0 +1,262 @@
+/*
+ * PCIe host controller driver for Marvell Armada-8K SoCs
+ *
+ * Armada-8K PCIe Glue Layer Source Code
+ *
+ * Copyright (C) 2016 Marvell Technology Group Ltd.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+
+#include "pcie-designware.h"
+
+struct armada8k_pcie {
+	void __iomem *base;
+	struct clk *clk;
+	struct pcie_port pp;
+};
+
+#define PCIE_VENDOR_REGS_OFFSET		0x8000
+
+#define PCIE_GLOBAL_CONTROL_REG		0x0
+#define PCIE_APP_LTSSM_EN		BIT(2)
+#define PCIE_DEVICE_TYPE_SHIFT		4
+#define PCIE_DEVICE_TYPE_MASK		0xF
+#define PCIE_DEVICE_TYPE_RC		0x4 /* Root complex */
+
+#define PCIE_GLOBAL_STATUS_REG		0x8
+#define PCIE_GLB_STS_RDLH_LINK_UP	BIT(1)
+#define PCIE_GLB_STS_PHY_LINK_UP	BIT(9)
+
+#define PCIE_GLOBAL_INT_CAUSE1_REG	0x1C
+#define PCIE_GLOBAL_INT_MASK1_REG	0x20
+#define PCIE_INT_A_ASSERT_MASK		BIT(9)
+#define PCIE_INT_B_ASSERT_MASK		BIT(10)
+#define PCIE_INT_C_ASSERT_MASK		BIT(11)
+#define PCIE_INT_D_ASSERT_MASK		BIT(12)
+
+#define PCIE_ARCACHE_TRC_REG		0x50
+#define PCIE_AWCACHE_TRC_REG		0x54
+#define PCIE_ARUSER_REG			0x5C
+#define PCIE_AWUSER_REG			0x60
+/*
+ * AR/AW Cache defauls: Normal memory, Write-Back, Read / Write
+ * allocate
+ */
+#define ARCACHE_DEFAULT_VALUE		0x3511
+#define AWCACHE_DEFAULT_VALUE		0x5311
+
+#define DOMAIN_OUTER_SHAREABLE		0x2
+#define AX_USER_DOMAIN_MASK		0x3
+#define AX_USER_DOMAIN_SHIFT		4
+
+#define to_armada8k_pcie(x)	container_of(x, struct armada8k_pcie, pp)
+
+static int armada8k_pcie_link_up(struct pcie_port *pp)
+{
+	struct armada8k_pcie *pcie = to_armada8k_pcie(pp);
+	u32 reg;
+	u32 mask = PCIE_GLB_STS_RDLH_LINK_UP | PCIE_GLB_STS_PHY_LINK_UP;
+
+	reg = readl(pcie->base + PCIE_GLOBAL_STATUS_REG);
+
+	if ((reg & mask) == mask)
+		return 1;
+
+	dev_dbg(pp->dev, "No link detected (Global-Status: 0x%08x).\n", reg);
+	return 0;
+}
+
+static void armada8k_pcie_establish_link(struct pcie_port *pp)
+{
+	struct armada8k_pcie *pcie = to_armada8k_pcie(pp);
+	void __iomem *base = pcie->base;
+	u32 reg;
+
+	if (!dw_pcie_link_up(pp)) {
+		/* Disable LTSSM state machine to enable configuration */
+		reg = readl(base + PCIE_GLOBAL_CONTROL_REG);
+		reg &= ~(PCIE_APP_LTSSM_EN);
+		writel(reg, base + PCIE_GLOBAL_CONTROL_REG);
+	}
+
+	/* Set the device to root complex mode */
+	reg = readl(base + PCIE_GLOBAL_CONTROL_REG);
+	reg &= ~(PCIE_DEVICE_TYPE_MASK << PCIE_DEVICE_TYPE_SHIFT);
+	reg |= PCIE_DEVICE_TYPE_RC << PCIE_DEVICE_TYPE_SHIFT;
+	writel(reg, base + PCIE_GLOBAL_CONTROL_REG);
+
+	/* Set the PCIe master AxCache attributes */
+	writel(ARCACHE_DEFAULT_VALUE, base + PCIE_ARCACHE_TRC_REG);
+	writel(AWCACHE_DEFAULT_VALUE, base + PCIE_AWCACHE_TRC_REG);
+
+	/* Set the PCIe master AxDomain attributes */
+	reg = readl(base + PCIE_ARUSER_REG);
+	reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT);
+	reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT;
+	writel(reg, base + PCIE_ARUSER_REG);
+
+	reg = readl(base + PCIE_AWUSER_REG);
+	reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT);
+	reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT;
+	writel(reg, base + PCIE_AWUSER_REG);
+
+	/* Enable INT A-D interrupts */
+	reg = readl(base + PCIE_GLOBAL_INT_MASK1_REG);
+	reg |= PCIE_INT_A_ASSERT_MASK | PCIE_INT_B_ASSERT_MASK |
+	       PCIE_INT_C_ASSERT_MASK | PCIE_INT_D_ASSERT_MASK;
+	writel(reg, base + PCIE_GLOBAL_INT_MASK1_REG);
+
+	if (!dw_pcie_link_up(pp)) {
+		/* Configuration done. Start LTSSM */
+		reg = readl(base + PCIE_GLOBAL_CONTROL_REG);
+		reg |= PCIE_APP_LTSSM_EN;
+		writel(reg, base + PCIE_GLOBAL_CONTROL_REG);
+	}
+
+	/* Wait until the link becomes active again */
+	if (dw_pcie_wait_for_link(pp))
+		dev_err(pp->dev, "Link not up after reconfiguration\n");
+}
+
+static void armada8k_pcie_host_init(struct pcie_port *pp)
+{
+	dw_pcie_setup_rc(pp);
+	armada8k_pcie_establish_link(pp);
+}
+
+static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg)
+{
+	struct pcie_port *pp = arg;
+	struct armada8k_pcie *pcie = to_armada8k_pcie(pp);
+	void __iomem *base = pcie->base;
+	u32 val;
+
+	/*
+	 * Interrupts are directly handled by the device driver of the
+	 * PCI device. However, they are also latched into the PCIe
+	 * controller, so we simply discard them.
+	 */
+	val = readl(base + PCIE_GLOBAL_INT_CAUSE1_REG);
+	writel(val, base + PCIE_GLOBAL_INT_CAUSE1_REG);
+
+	return IRQ_HANDLED;
+}
+
+static struct pcie_host_ops armada8k_pcie_host_ops = {
+	.link_up = armada8k_pcie_link_up,
+	.host_init = armada8k_pcie_host_init,
+};
+
+static int armada8k_add_pcie_port(struct pcie_port *pp,
+				  struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->root_bus_nr = -1;
+	pp->ops = &armada8k_pcie_host_ops;
+
+	pp->irq = platform_get_irq(pdev, 0);
+	if (!pp->irq) {
+		dev_err(dev, "failed to get irq for port\n");
+		return -ENODEV;
+	}
+
+	ret = devm_request_irq(dev, pp->irq, armada8k_pcie_irq_handler,
+			       IRQF_SHARED, "armada8k-pcie", pp);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", pp->irq);
+		return ret;
+	}
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int armada8k_pcie_probe(struct platform_device *pdev)
+{
+	struct armada8k_pcie *pcie;
+	struct pcie_port *pp;
+	struct device *dev = &pdev->dev;
+	struct resource *base;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	clk_prepare_enable(pcie->clk);
+
+	pp = &pcie->pp;
+	pp->dev = dev;
+	platform_set_drvdata(pdev, pcie);
+
+	/* Get the dw-pcie unit configuration/control registers base. */
+	base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl");
+	pp->dbi_base = devm_ioremap_resource(dev, base);
+	if (IS_ERR(pp->dbi_base)) {
+		dev_err(dev, "couldn't remap regs base %p\n", base);
+		ret = PTR_ERR(pp->dbi_base);
+		goto fail;
+	}
+
+	pcie->base = pp->dbi_base + PCIE_VENDOR_REGS_OFFSET;
+
+	ret = armada8k_add_pcie_port(pp, pdev);
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	if (!IS_ERR(pcie->clk))
+		clk_disable_unprepare(pcie->clk);
+
+	return ret;
+}
+
+static const struct of_device_id armada8k_pcie_of_match[] = {
+	{ .compatible = "marvell,armada8k-pcie", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, armada8k_pcie_of_match);
+
+static struct platform_driver armada8k_pcie_driver = {
+	.probe		= armada8k_pcie_probe,
+	.driver = {
+		.name	= "armada8k-pcie",
+		.of_match_table = of_match_ptr(armada8k_pcie_of_match),
+	},
+};
+
+module_platform_driver(armada8k_pcie_driver);
+
+MODULE_DESCRIPTION("Armada 8k PCIe host controller driver");
+MODULE_AUTHOR("Yehuda Yitshak <yehuday@marvell.com>");
+MODULE_AUTHOR("Shadi Ammouri <shadi@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index a4cccd3..aafd766 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -434,7 +434,6 @@
 	struct platform_device *pdev = to_platform_device(pp->dev);
 	struct pci_bus *bus, *child;
 	struct resource *cfg_res;
-	u32 val;
 	int i, ret;
 	LIST_HEAD(res);
 	struct resource_entry *win;
@@ -544,25 +543,6 @@
 	if (pp->ops->host_init)
 		pp->ops->host_init(pp);
 
-	/*
-	 * If the platform provides ->rd_other_conf, it means the platform
-	 * uses its own address translation component rather than ATU, so
-	 * we should not program the ATU here.
-	 */
-	if (!pp->ops->rd_other_conf)
-		dw_pcie_prog_outbound_atu(pp, PCIE_ATU_REGION_INDEX1,
-					  PCIE_ATU_TYPE_MEM, pp->mem_base,
-					  pp->mem_bus_addr, pp->mem_size);
-
-	dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0);
-
-	/* program correct class for RC */
-	dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI);
-
-	dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val);
-	val |= PORT_LOGIC_SPEED_CHANGE;
-	dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
-
 	pp->root_bus_nr = pp->busn->start;
 	if (IS_ENABLED(CONFIG_PCI_MSI)) {
 		bus = pci_scan_root_bus_msi(pp->dev, pp->root_bus_nr,
@@ -728,8 +708,6 @@
 void dw_pcie_setup_rc(struct pcie_port *pp)
 {
 	u32 val;
-	u32 membase;
-	u32 memlimit;
 
 	/* set the number of lanes */
 	dw_pcie_readl_rc(pp, PCIE_PORT_LINK_CONTROL, &val);
@@ -788,18 +766,31 @@
 	val |= 0x00010100;
 	dw_pcie_writel_rc(pp, val, PCI_PRIMARY_BUS);
 
-	/* setup memory base, memory limit */
-	membase = ((u32)pp->mem_base & 0xfff00000) >> 16;
-	memlimit = (pp->mem_size + (u32)pp->mem_base) & 0xfff00000;
-	val = memlimit | membase;
-	dw_pcie_writel_rc(pp, val, PCI_MEMORY_BASE);
-
 	/* setup command register */
 	dw_pcie_readl_rc(pp, PCI_COMMAND, &val);
 	val &= 0xffff0000;
 	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
 		PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
 	dw_pcie_writel_rc(pp, val, PCI_COMMAND);
+
+	/*
+	 * If the platform provides ->rd_other_conf, it means the platform
+	 * uses its own address translation component rather than ATU, so
+	 * we should not program the ATU here.
+	 */
+	if (!pp->ops->rd_other_conf)
+		dw_pcie_prog_outbound_atu(pp, PCIE_ATU_REGION_INDEX1,
+					  PCIE_ATU_TYPE_MEM, pp->mem_base,
+					  pp->mem_bus_addr, pp->mem_size);
+
+	dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0);
+
+	/* program correct class for RC */
+	dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI);
+
+	dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
 }
 
 MODULE_AUTHOR("Jingoo Han <jg1.han@samsung.com>");
diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index 5139e64..3479d30 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -819,7 +819,7 @@
 
 	err = nwl_pcie_bridge_init(pcie);
 	if (err) {
-		dev_err(pcie->dev, "HW Initalization failed\n");
+		dev_err(pcie->dev, "HW Initialization failed\n");
 		return err;
 	}
 
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 2f6d3a1..f6221d7 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -138,6 +138,8 @@
 	char *table;
 
 	size = ibm_get_table_from_acpi(&table);
+	if (size < 0)
+		return NULL;
 	des = (union apci_descriptor *)table;
 	if (memcmp(des->header.sig, "aPCI", 4) != 0)
 		goto ibm_slot_done;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 342b691..d319a9c 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1008,6 +1008,9 @@
 	if (i >= PCI_ROM_RESOURCE)
 		return -ENODEV;
 
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
+		return -EINVAL;
+
 	if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) {
 		WARN(1, "process \"%s\" tried to map 0x%08lx bytes at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
 			current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
@@ -1024,10 +1027,6 @@
 	pci_resource_to_user(pdev, i, res, &start, &end);
 	vma->vm_pgoff += start >> PAGE_SHIFT;
 	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
-
-	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(start))
-		return -EINVAL;
-
 	return pci_mmap_page_range(pdev, vma, mmap_type, write_combine);
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 25e0327..c8b4dbd 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2228,7 +2228,7 @@
 
 static unsigned long pci_ea_flags(struct pci_dev *dev, u8 prop)
 {
-	unsigned long flags = IORESOURCE_PCI_FIXED;
+	unsigned long flags = IORESOURCE_PCI_FIXED | IORESOURCE_PCI_EA_BEI;
 
 	switch (prop) {
 	case PCI_EA_P_MEM:
@@ -2389,7 +2389,7 @@
 	return offset + ent_size;
 }
 
-/* Enhanced Allocation Initalization */
+/* Enhanced Allocation Initialization */
 void pci_ea_init(struct pci_dev *dev)
 {
 	int ea;
@@ -2547,7 +2547,7 @@
  * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites
  * @dev: the PCI device
  */
-static int pci_std_enable_acs(struct pci_dev *dev)
+static void pci_std_enable_acs(struct pci_dev *dev)
 {
 	int pos;
 	u16 cap;
@@ -2555,7 +2555,7 @@
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
 	if (!pos)
-		return -ENODEV;
+		return;
 
 	pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap);
 	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
@@ -2573,8 +2573,6 @@
 	ctrl |= (cap & PCI_ACS_UF);
 
 	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
-
-	return 0;
 }
 
 /**
@@ -2586,10 +2584,10 @@
 	if (!pci_acs_enable)
 		return;
 
-	if (!pci_std_enable_acs(dev))
+	if (!pci_dev_specific_enable_acs(dev))
 		return;
 
-	pci_dev_specific_enable_acs(dev);
+	pci_std_enable_acs(dev);
 }
 
 static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
@@ -3021,6 +3019,121 @@
 }
 EXPORT_SYMBOL(pci_request_regions_exclusive);
 
+#ifdef PCI_IOBASE
+struct io_range {
+	struct list_head list;
+	phys_addr_t start;
+	resource_size_t size;
+};
+
+static LIST_HEAD(io_range_list);
+static DEFINE_SPINLOCK(io_range_lock);
+#endif
+
+/*
+ * Record the PCI IO range (expressed as CPU physical address + size).
+ * Return a negative value if an error has occured, zero otherwise
+ */
+int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size)
+{
+	int err = 0;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	/* check if the range hasn't been previously recorded */
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (addr >= range->start && addr + size <= range->start + size) {
+			/* range already registered, bail out */
+			goto end_register;
+		}
+		allocated_size += range->size;
+	}
+
+	/* range not registed yet, check for available space */
+	if (allocated_size + size - 1 > IO_SPACE_LIMIT) {
+		/* if it's too big check if 64K space can be reserved */
+		if (allocated_size + SZ_64K - 1 > IO_SPACE_LIMIT) {
+			err = -E2BIG;
+			goto end_register;
+		}
+
+		size = SZ_64K;
+		pr_warn("Requested IO range too big, new size set to 64K\n");
+	}
+
+	/* add the range to the list */
+	range = kzalloc(sizeof(*range), GFP_ATOMIC);
+	if (!range) {
+		err = -ENOMEM;
+		goto end_register;
+	}
+
+	range->start = addr;
+	range->size = size;
+
+	list_add_tail(&range->list, &io_range_list);
+
+end_register:
+	spin_unlock(&io_range_lock);
+#endif
+
+	return err;
+}
+
+phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+	phys_addr_t address = (phys_addr_t)OF_BAD_ADDR;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	if (pio > IO_SPACE_LIMIT)
+		return address;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (pio >= allocated_size && pio < allocated_size + range->size) {
+			address = range->start + pio - allocated_size;
+			break;
+		}
+		allocated_size += range->size;
+	}
+	spin_unlock(&io_range_lock);
+#endif
+
+	return address;
+}
+
+unsigned long __weak pci_address_to_pio(phys_addr_t address)
+{
+#ifdef PCI_IOBASE
+	struct io_range *res;
+	resource_size_t offset = 0;
+	unsigned long addr = -1;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(res, &io_range_list, list) {
+		if (address >= res->start && address < res->start + res->size) {
+			addr = address - res->start + offset;
+			break;
+		}
+		offset += res->size;
+	}
+	spin_unlock(&io_range_lock);
+
+	return addr;
+#else
+	if (address > IO_SPACE_LIMIT)
+		return (unsigned long)-1;
+
+	return (unsigned long) address;
+#endif
+}
+
 /**
  *	pci_remap_iospace - Remap the memory mapped I/O space
  *	@res: Resource describing the I/O space
@@ -4578,6 +4691,37 @@
 	return 0;
 }
 
+/**
+ * pci_add_dma_alias - Add a DMA devfn alias for a device
+ * @dev: the PCI device for which alias is added
+ * @devfn: alias slot and function
+ *
+ * This helper encodes 8-bit devfn as bit number in dma_alias_mask.
+ * It should be called early, preferably as PCI fixup header quirk.
+ */
+void pci_add_dma_alias(struct pci_dev *dev, u8 devfn)
+{
+	if (!dev->dma_alias_mask)
+		dev->dma_alias_mask = kcalloc(BITS_TO_LONGS(U8_MAX),
+					      sizeof(long), GFP_KERNEL);
+	if (!dev->dma_alias_mask) {
+		dev_warn(&dev->dev, "Unable to allocate DMA alias mask\n");
+		return;
+	}
+
+	set_bit(devfn, dev->dma_alias_mask);
+	dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n",
+		 PCI_SLOT(devfn), PCI_FUNC(devfn));
+}
+
+bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2)
+{
+	return (dev1->dma_alias_mask &&
+		test_bit(dev2->devfn, dev1->dma_alias_mask)) ||
+	       (dev2->dma_alias_mask &&
+		test_bit(dev1->devfn, dev2->dma_alias_mask));
+}
+
 bool pci_device_is_present(struct pci_dev *pdev)
 {
 	u32 v;
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 72db7f4..22ca641 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -81,3 +81,17 @@
 config PCIE_PME
 	def_bool y
 	depends on PCIEPORTBUS && PM
+
+config PCIE_DPC
+	tristate "PCIe Downstream Port Containment support"
+	depends on PCIEPORTBUS
+	default n
+	help
+	  This enables PCI Express Downstream Port Containment (DPC)
+	  driver support.  DPC events from Root and Downstream ports
+	  will be handled by the DPC driver.  If your system doesn't
+	  have this capability or you do not want to use this feature,
+	  it is safe to answer N.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pcie-dpc.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 00c62df..b24525b 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -14,3 +14,5 @@
 obj-$(CONFIG_PCIEAER)		+= aer/
 
 obj-$(CONFIG_PCIE_PME) += pme.o
+
+obj-$(CONFIG_PCIE_DPC) += pcie-dpc.o
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
new file mode 100644
index 0000000..ab552f1
--- /dev/null
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -0,0 +1,163 @@
+/*
+ * PCI Express Downstream Port Containment services driver
+ * Copyright (C) 2016 Intel Corp.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pcieport_if.h>
+
+struct dpc_dev {
+	struct pcie_device	*dev;
+	struct work_struct 	work;
+	int 			cap_pos;
+};
+
+static void dpc_wait_link_inactive(struct pci_dev *pdev)
+{
+	unsigned long timeout = jiffies + HZ;
+	u16 lnk_status;
+
+	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+	while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
+					!time_after(jiffies, timeout)) {
+		msleep(10);
+		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+	}
+	if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
+		dev_warn(&pdev->dev, "Link state not disabled for DPC event");
+}
+
+static void interrupt_event_handler(struct work_struct *work)
+{
+	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
+	struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
+	struct pci_bus *parent = pdev->subordinate;
+
+	pci_lock_rescan_remove();
+	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
+					 bus_list) {
+		pci_dev_get(dev);
+		pci_stop_and_remove_bus_device(dev);
+		pci_dev_put(dev);
+	}
+	pci_unlock_rescan_remove();
+
+	dpc_wait_link_inactive(pdev);
+	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
+		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
+}
+
+static irqreturn_t dpc_irq(int irq, void *context)
+{
+	struct dpc_dev *dpc = (struct dpc_dev *)context;
+	struct pci_dev *pdev = dpc->dev->port;
+	u16 status, source;
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_SOURCE_ID,
+			     &source);
+	if (!status)
+		return IRQ_NONE;
+
+	dev_info(&dpc->dev->device, "DPC containment event, status:%#06x source:%#06x\n",
+		status, source);
+
+	if (status & PCI_EXP_DPC_STATUS_TRIGGER) {
+		u16 reason = (status >> 1) & 0x3;
+
+		dev_warn(&dpc->dev->device, "DPC %s triggered, remove downstream devices\n",
+			 (reason == 0) ? "unmasked uncorrectable error" :
+			 (reason == 1) ? "ERR_NONFATAL" :
+			 (reason == 2) ? "ERR_FATAL" : "extended error");
+		schedule_work(&dpc->work);
+	}
+	return IRQ_HANDLED;
+}
+
+#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
+static int dpc_probe(struct pcie_device *dev)
+{
+	struct dpc_dev *dpc;
+	struct pci_dev *pdev = dev->port;
+	int status;
+	u16 ctl, cap;
+
+	dpc = kzalloc(sizeof(*dpc), GFP_KERNEL);
+	if (!dpc)
+		return -ENOMEM;
+
+	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
+	dpc->dev = dev;
+	INIT_WORK(&dpc->work, interrupt_event_handler);
+	set_service_data(dev, dpc);
+
+	status = request_irq(dev->irq, dpc_irq, IRQF_SHARED, "pcie-dpc", dpc);
+	if (status) {
+		dev_warn(&dev->device, "request IRQ%d failed: %d\n", dev->irq,
+			 status);
+		goto out;
+	}
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+
+	ctl |= PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN;
+	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
+
+	dev_info(&dev->device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
+		cap & 0xf, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
+		FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
+		FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), (cap >> 8) & 0xf,
+		FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));
+	return status;
+ out:
+	kfree(dpc);
+	return status;
+}
+
+static void dpc_remove(struct pcie_device *dev)
+{
+	struct dpc_dev *dpc = get_service_data(dev);
+	struct pci_dev *pdev = dev->port;
+	u16 ctl;
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+	ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN);
+	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
+
+	free_irq(dev->irq, dpc);
+	kfree(dpc);
+}
+
+static struct pcie_port_service_driver dpcdriver = {
+	.name		= "dpc",
+	.port_type	= PCI_EXP_TYPE_ROOT_PORT | PCI_EXP_TYPE_DOWNSTREAM,
+	.service	= PCIE_PORT_SERVICE_DPC,
+	.probe		= dpc_probe,
+	.remove		= dpc_remove,
+};
+
+static int __init dpc_service_init(void)
+{
+	return pcie_port_service_register(&dpcdriver);
+}
+
+static void __exit dpc_service_exit(void)
+{
+	pcie_port_service_unregister(&dpcdriver);
+}
+
+MODULE_DESCRIPTION("PCI Express Downstream Port Containment driver");
+MODULE_AUTHOR("Keith Busch <keith.busch@intel.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.1");
+
+module_init(dpc_service_init);
+module_exit(dpc_service_exit);
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index d525548..587aef3 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -11,14 +11,14 @@
 
 #include <linux/compiler.h>
 
-#define PCIE_PORT_DEVICE_MAXSERVICES   4
+#define PCIE_PORT_DEVICE_MAXSERVICES   5
 /*
  * According to the PCI Express Base Specification 2.0, the indices of
  * the MSI-X table entries used by port services must not exceed 31
  */
 #define PCIE_PORT_MAX_MSIX_ENTRIES	32
 
-#define get_descriptor_id(type, service) (((type - 4) << 4) | service)
+#define get_descriptor_id(type, service) (((type - 4) << 8) | service)
 
 extern struct bus_type pcie_port_bus_type;
 int pcie_port_device_register(struct pci_dev *dev);
@@ -67,17 +67,14 @@
 #endif /* !CONFIG_PCIE_PME */
 
 #ifdef CONFIG_ACPI
-int pcie_port_acpi_setup(struct pci_dev *port, int *mask);
+void pcie_port_acpi_setup(struct pci_dev *port, int *mask);
 
-static inline int pcie_port_platform_notify(struct pci_dev *port, int *mask)
+static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask)
 {
-	return pcie_port_acpi_setup(port, mask);
+	pcie_port_acpi_setup(port, mask);
 }
 #else /* !CONFIG_ACPI */
-static inline int pcie_port_platform_notify(struct pci_dev *port, int *mask)
-{
-	return 0;
-}
+static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask){}
 #endif /* !CONFIG_ACPI */
 
 #endif /* _PORTDRV_H_ */
diff --git a/drivers/pci/pcie/portdrv_acpi.c b/drivers/pci/pcie/portdrv_acpi.c
index b4d2894..6b8c2f1 100644
--- a/drivers/pci/pcie/portdrv_acpi.c
+++ b/drivers/pci/pcie/portdrv_acpi.c
@@ -32,32 +32,30 @@
  * NOTE: It turns out that we cannot do that for individual port services
  * separately, because that would make some systems work incorrectly.
  */
-int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask)
+void pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask)
 {
 	struct acpi_pci_root *root;
 	acpi_handle handle;
 	u32 flags;
 
 	if (acpi_pci_disabled)
-		return 0;
+		return;
 
 	handle = acpi_find_root_bridge_handle(port);
 	if (!handle)
-		return -EINVAL;
+		return;
 
 	root = acpi_pci_find_root(handle);
 	if (!root)
-		return -ENODEV;
+		return;
 
 	flags = root->osc_control_set;
 
-	*srv_mask = PCIE_PORT_SERVICE_VC;
+	*srv_mask = PCIE_PORT_SERVICE_VC | PCIE_PORT_SERVICE_DPC;
 	if (flags & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL)
 		*srv_mask |= PCIE_PORT_SERVICE_HP;
 	if (flags & OSC_PCI_EXPRESS_PME_CONTROL)
 		*srv_mask |= PCIE_PORT_SERVICE_PME;
 	if (flags & OSC_PCI_EXPRESS_AER_CONTROL)
 		*srv_mask |= PCIE_PORT_SERVICE_AER;
-
-	return 0;
 }
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 88122dc..32d4d0a 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -254,38 +254,28 @@
 static int get_port_device_capability(struct pci_dev *dev)
 {
 	int services = 0;
-	u32 reg32;
 	int cap_mask = 0;
-	int err;
 
 	if (pcie_ports_disabled)
 		return 0;
 
 	cap_mask = PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP
-			| PCIE_PORT_SERVICE_VC;
+			| PCIE_PORT_SERVICE_VC | PCIE_PORT_SERVICE_DPC;
 	if (pci_aer_available())
 		cap_mask |= PCIE_PORT_SERVICE_AER;
 
-	if (pcie_ports_auto) {
-		err = pcie_port_platform_notify(dev, &cap_mask);
-		if (err)
-			return 0;
-	}
+	if (pcie_ports_auto)
+		pcie_port_platform_notify(dev, &cap_mask);
 
 	/* Hot-Plug Capable */
-	if ((cap_mask & PCIE_PORT_SERVICE_HP) &&
-	    pcie_caps_reg(dev) & PCI_EXP_FLAGS_SLOT) {
-		pcie_capability_read_dword(dev, PCI_EXP_SLTCAP, &reg32);
-		if (reg32 & PCI_EXP_SLTCAP_HPC) {
-			services |= PCIE_PORT_SERVICE_HP;
-			/*
-			 * Disable hot-plug interrupts in case they have been
-			 * enabled by the BIOS and the hot-plug service driver
-			 * is not loaded.
-			 */
-			pcie_capability_clear_word(dev, PCI_EXP_SLTCTL,
-				PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE);
-		}
+	if ((cap_mask & PCIE_PORT_SERVICE_HP) && dev->is_hotplug_bridge) {
+		services |= PCIE_PORT_SERVICE_HP;
+		/*
+		 * Disable hot-plug interrupts in case they have been enabled
+		 * by the BIOS and the hot-plug service driver is not loaded.
+		 */
+		pcie_capability_clear_word(dev, PCI_EXP_SLTCTL,
+			  PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE);
 	}
 	/* AER capable */
 	if ((cap_mask & PCIE_PORT_SERVICE_AER)
@@ -311,6 +301,8 @@
 		 */
 		pcie_pme_interrupt_enable(dev, false);
 	}
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC))
+		services |= PCIE_PORT_SERVICE_DPC;
 
 	return services;
 }
@@ -338,7 +330,7 @@
 	device = &pcie->device;
 	device->bus = &pcie_port_bus_type;
 	device->release = release_pcie_device;	/* callback to free pcie dev */
-	dev_set_name(device, "%s:pcie%02x",
+	dev_set_name(device, "%s:pcie%03x",
 		     pci_name(pdev),
 		     get_descriptor_id(pci_pcie_type(pdev), service));
 	device->parent = &pdev->dev;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8004f67..8e3ef72 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -179,9 +179,6 @@
 	u16 orig_cmd;
 	struct pci_bus_region region, inverted_region;
 
-	if (dev->non_compliant_bars)
-		return 0;
-
 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
 	/* No printks while decoding is disabled! */
@@ -322,6 +319,9 @@
 {
 	unsigned int pos, reg;
 
+	if (dev->non_compliant_bars)
+		return;
+
 	for (pos = 0; pos < howmany; pos++) {
 		struct resource *res = &dev->resource[pos];
 		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
@@ -1537,6 +1537,7 @@
 	pcibios_release_device(pci_dev);
 	pci_bus_put(pci_dev->bus);
 	kfree(pci_dev->driver_override);
+	kfree(pci_dev->dma_alias_mask);
 	kfree(pci_dev);
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8e67802..ee72ebe 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3150,6 +3150,39 @@
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
 			 quirk_broken_intx_masking);
 
+/*
+ * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
+ * DisINTx can be set but the interrupt status bit is non-functional.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1572,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1574,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1580,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1581,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1583,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1584,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1585,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1586,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1587,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1588,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1589,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d0,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d1,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d2,
+			 quirk_broken_intx_masking);
+
 static void quirk_no_bus_reset(struct pci_dev *dev)
 {
 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
@@ -3185,6 +3218,29 @@
 DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
 			       PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);
 
+/*
+ * Thunderbolt controllers with broken MSI hotplug signaling:
+ * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
+ * of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge).
+ */
+static void quirk_thunderbolt_hotplug_msi(struct pci_dev *pdev)
+{
+	if (pdev->is_hotplug_bridge &&
+	    (pdev->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C ||
+	     pdev->revision <= 1))
+		pdev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LIGHT_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EAGLE_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LIGHT_PEAK,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+
 #ifdef CONFIG_ACPI
 /*
  * Apple: Shutdown Cactus Ridge Thunderbolt controller.
@@ -3232,7 +3288,8 @@
 	acpi_execute_simple_method(SXIO, NULL, 0);
 	acpi_execute_simple_method(SXLV, NULL, 0);
 }
-DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL, 0x1547,
+DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
 			       quirk_apple_poweroff_thunderbolt);
 
 /*
@@ -3266,9 +3323,11 @@
 	if (!nhi)
 		goto out;
 	if (nhi->vendor != PCI_VENDOR_ID_INTEL
-			|| (nhi->device != 0x1547 && nhi->device != 0x156c)
-			|| nhi->subsystem_vendor != 0x2222
-			|| nhi->subsystem_device != 0x1111)
+		    || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
+			nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
+			nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI)
+		    || nhi->subsystem_vendor != 0x2222
+		    || nhi->subsystem_device != 0x1111)
 		goto out;
 	dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n");
 	device_pm_wait_for_dev(&dev->dev, &nhi->dev);
@@ -3276,9 +3335,14 @@
 	pci_dev_put(nhi);
 	pci_dev_put(sibling);
 }
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, 0x1547,
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_LIGHT_RIDGE,
 			       quirk_apple_wait_for_thunderbolt);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, 0x156d,
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
+			       quirk_apple_wait_for_thunderbolt);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE,
 			       quirk_apple_wait_for_thunderbolt);
 #endif
 
@@ -3610,10 +3674,8 @@
 
 static void quirk_dma_func0_alias(struct pci_dev *dev)
 {
-	if (PCI_FUNC(dev->devfn) != 0) {
-		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-	}
+	if (PCI_FUNC(dev->devfn) != 0)
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
 }
 
 /*
@@ -3626,10 +3688,8 @@
 
 static void quirk_dma_func1_alias(struct pci_dev *dev)
 {
-	if (PCI_FUNC(dev->devfn) != 1) {
-		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 1);
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-	}
+	if (PCI_FUNC(dev->devfn) != 1)
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1));
 }
 
 /*
@@ -3695,13 +3755,8 @@
 	const struct pci_device_id *id;
 
 	id = pci_match_id(fixed_dma_alias_tbl, dev);
-	if (id) {
-		dev->dma_alias_devfn = id->driver_data;
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-		dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n",
-			 PCI_SLOT(dev->dma_alias_devfn),
-			 PCI_FUNC(dev->dma_alias_devfn));
-	}
+	if (id)
+		pci_add_dma_alias(dev, id->driver_data);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ADAPTEC2, 0x0285, quirk_fixed_dma_alias);
@@ -3734,6 +3789,21 @@
 DECLARE_PCI_FIXUP_HEADER(0x8086, 0x244e, quirk_use_pcie_bridge_dma_alias);
 
 /*
+ * MIC x200 NTB forwards PCIe traffic using multiple alien RIDs. They have to
+ * be added as aliases to the DMA device in order to allow buffer access
+ * when IOMMU is enabled. Following devfns have to match RIT-LUT table
+ * programmed in the EEPROM.
+ */
+static void quirk_mic_x200_dma_alias(struct pci_dev *pdev)
+{
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x10, 0x0));
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x11, 0x0));
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x12, 0x3));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias);
+
+/*
  * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
  * class code.  Fix it.
  */
@@ -3936,6 +4006,55 @@
 	return acs_flags & ~flags ? 0 : 1;
 }
 
+/*
+ * Sunrise Point PCH root ports implement ACS, but unfortunately as shown in
+ * the datasheet (Intel 100 Series Chipset Family PCH Datasheet, Vol. 2,
+ * 12.1.46, 12.1.47)[1] this chipset uses dwords for the ACS capability and
+ * control registers whereas the PCIe spec packs them into words (Rev 3.0,
+ * 7.16 ACS Extended Capability).  The bit definitions are correct, but the
+ * control register is at offset 8 instead of 6 and we should probably use
+ * dword accesses to them.  This applies to the following PCI Device IDs, as
+ * found in volume 1 of the datasheet[2]:
+ *
+ * 0xa110-0xa11f Sunrise Point-H PCI Express Root Port #{0-16}
+ * 0xa167-0xa16a Sunrise Point-H PCI Express Root Port #{17-20}
+ *
+ * N.B. This doesn't fix what lspci shows.
+ *
+ * [1] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html
+ * [2] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html
+ */
+static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev)
+{
+	return pci_is_pcie(dev) &&
+		pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT &&
+		((dev->device & ~0xf) == 0xa110 ||
+		 (dev->device >= 0xa167 && dev->device <= 0xa16a));
+}
+
+#define INTEL_SPT_ACS_CTRL (PCI_ACS_CAP + 4)
+
+static int pci_quirk_intel_spt_pch_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return -ENOTTY;
+
+	/* see pci_acs_flags_enabled() */
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	acs_flags &= (cap | PCI_ACS_EC);
+
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	return acs_flags & ~ctrl ? 0 : 1;
+}
+
 static int pci_quirk_mf_endpoint_acs(struct pci_dev *dev, u16 acs_flags)
 {
 	/*
@@ -4024,6 +4143,7 @@
 	{ PCI_VENDOR_ID_INTEL, 0x15b8, pci_quirk_mf_endpoint_acs },
 	/* Intel PCH root ports */
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_pch_acs },
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_spt_pch_acs },
 	{ 0x19a2, 0x710, pci_quirk_mf_endpoint_acs }, /* Emulex BE3-R */
 	{ 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */
 	/* Cavium ThunderX */
@@ -4159,16 +4279,44 @@
 	return 0;
 }
 
+static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return -ENOTTY;
+
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	ctrl |= (cap & PCI_ACS_SV);
+	ctrl |= (cap & PCI_ACS_RR);
+	ctrl |= (cap & PCI_ACS_CR);
+	ctrl |= (cap & PCI_ACS_UF);
+
+	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
+
+	dev_info(&dev->dev, "Intel SPT PCH root port ACS workaround enabled\n");
+
+	return 0;
+}
+
 static const struct pci_dev_enable_acs {
 	u16 vendor;
 	u16 device;
 	int (*enable_acs)(struct pci_dev *dev);
 } pci_dev_enable_acs[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_pch_acs },
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_spt_pch_acs },
 	{ 0 }
 };
 
-void pci_dev_specific_enable_acs(struct pci_dev *dev)
+int pci_dev_specific_enable_acs(struct pci_dev *dev)
 {
 	const struct pci_dev_enable_acs *i;
 	int ret;
@@ -4180,9 +4328,11 @@
 		     i->device == (u16)PCI_ANY_ID)) {
 			ret = i->enable_acs(dev);
 			if (ret >= 0)
-				return;
+				return ret;
 		}
 	}
+
+	return -ENOTTY;
 }
 
 /*
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index a20ce7d..33e0f03 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -40,11 +40,15 @@
 	 * If the device is broken and uses an alias requester ID for
 	 * DMA, iterate over that too.
 	 */
-	if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) {
-		ret = fn(pdev, PCI_DEVID(pdev->bus->number,
-					 pdev->dma_alias_devfn), data);
-		if (ret)
-			return ret;
+	if (unlikely(pdev->dma_alias_mask)) {
+		u8 devfn;
+
+		for_each_set_bit(devfn, pdev->dma_alias_mask, U8_MAX) {
+			ret = fn(pdev, PCI_DEVID(pdev->bus->number, devfn),
+				 data);
+			if (ret)
+				return ret;
+		}
 	}
 
 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig
index 2cc7438..c356223 100644
--- a/drivers/pinctrl/bcm/Kconfig
+++ b/drivers/pinctrl/bcm/Kconfig
@@ -86,3 +86,16 @@
 	  The ChipcommonA GPIO controller support basic PINCONF functions such
 	  as bias pull up, pull down, and drive strength configurations, when
 	  these pins are muxed to GPIO.
+
+config PINCTRL_NS2_MUX
+	bool "Broadcom Northstar2 pinmux driver"
+	depends on OF
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	select PINMUX
+	select GENERIC_PINCONF
+	default ARM64 && ARCH_BCM_IPROC
+	help
+	  Say yes here to enable the Broadcom NS2 MUX driver.
+
+	  The Broadcom Northstar2 IOMUX driver supports group based IOMUX
+	  configuration.
diff --git a/drivers/pinctrl/bcm/Makefile b/drivers/pinctrl/bcm/Makefile
index 6148367d..3861a1c 100644
--- a/drivers/pinctrl/bcm/Makefile
+++ b/drivers/pinctrl/bcm/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_PINCTRL_IPROC_GPIO)	+= pinctrl-iproc-gpio.o
 obj-$(CONFIG_PINCTRL_CYGNUS_MUX)	+= pinctrl-cygnus-mux.o
 obj-$(CONFIG_PINCTRL_NSP_GPIO)		+= pinctrl-nsp-gpio.o
+obj-$(CONFIG_PINCTRL_NS2_MUX)		+= pinctrl-ns2-mux.o
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
index c3c692e..582f6df 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
@@ -1024,7 +1024,7 @@
 	.get_group_pins = bcm281xx_pinctrl_get_group_pins,
 	.pin_dbg_show = bcm281xx_pinctrl_pin_dbg_show,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int bcm281xx_pinctrl_get_fcns_count(struct pinctrl_dev *pctldev)
@@ -1422,9 +1422,7 @@
 	bcm281xx_pinctrl_desc.pins = bcm281xx_pinctrl.pins;
 	bcm281xx_pinctrl_desc.npins = bcm281xx_pinctrl.npins;
 
-	pctl = pinctrl_register(&bcm281xx_pinctrl_desc,
-				&pdev->dev,
-				pdata);
+	pctl = devm_pinctrl_register(&pdev->dev, &bcm281xx_pinctrl_desc, pdata);
 	if (IS_ERR(pctl)) {
 		dev_err(&pdev->dev, "Failed to register pinctrl\n");
 		return PTR_ERR(pctl);
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 08b1d93..fa77165 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -342,6 +342,18 @@
 	return bcm2835_gpio_get_bit(pc, GPLEV0, offset);
 }
 
+static int bcm2835_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+	struct bcm2835_pinctrl *pc = gpiochip_get_data(chip);
+	enum bcm2835_fsel fsel = bcm2835_pinctrl_fsel_get(pc, offset);
+
+	/* Alternative function doesn't clearly provide a direction */
+	if (fsel > BCM2835_FSEL_GPIO_OUT)
+		return -EINVAL;
+
+	return (fsel == BCM2835_FSEL_GPIO_IN);
+}
+
 static void bcm2835_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
 	struct bcm2835_pinctrl *pc = gpiochip_get_data(chip);
@@ -370,6 +382,7 @@
 	.free = gpiochip_generic_free,
 	.direction_input = bcm2835_gpio_direction_input,
 	.direction_output = bcm2835_gpio_direction_output,
+	.get_direction = bcm2835_gpio_get_direction,
 	.get = bcm2835_gpio_get,
 	.set = bcm2835_gpio_set,
 	.to_irq = bcm2835_gpio_to_irq,
@@ -1027,7 +1040,7 @@
 		return err;
 	}
 
-	pc->pctl_dev = pinctrl_register(&bcm2835_pinctrl_desc, dev, pc);
+	pc->pctl_dev = devm_pinctrl_register(dev, &bcm2835_pinctrl_desc, pc);
 	if (IS_ERR(pc->pctl_dev)) {
 		gpiochip_remove(&pc->gpio_chip);
 		return PTR_ERR(pc->pctl_dev);
@@ -1045,7 +1058,6 @@
 {
 	struct bcm2835_pinctrl *pc = platform_get_drvdata(pdev);
 
-	pinctrl_unregister(pc->pctl_dev);
 	gpiochip_remove(&pc->gpio_chip);
 
 	return 0;
diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c b/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c
index 9728f3d..d31c957 100644
--- a/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c
+++ b/drivers/pinctrl/bcm/pinctrl-cygnus-mux.c
@@ -737,7 +737,7 @@
 	.get_group_pins = cygnus_get_group_pins,
 	.pin_dbg_show = cygnus_pin_dbg_show,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_group,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int cygnus_get_functions_count(struct pinctrl_dev *pctrl_dev)
@@ -987,7 +987,7 @@
 	cygnus_pinctrl_desc.pins = pins;
 	cygnus_pinctrl_desc.npins = num_pins;
 
-	pinctrl->pctl = pinctrl_register(&cygnus_pinctrl_desc, &pdev->dev,
+	pinctrl->pctl = devm_pinctrl_register(&pdev->dev, &cygnus_pinctrl_desc,
 			pinctrl);
 	if (IS_ERR(pinctrl->pctl)) {
 		dev_err(&pdev->dev, "unable to register Cygnus IOMUX pinctrl\n");
diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index d530ab4..3670f5e 100644
--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -379,7 +379,7 @@
 	.get_groups_count = iproc_get_groups_count,
 	.get_group_name = iproc_get_group_name,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio,
@@ -623,7 +623,7 @@
 	pctldesc->npins = gc->ngpio;
 	pctldesc->confops = &iproc_pconf_ops;
 
-	chip->pctl = pinctrl_register(pctldesc, chip->dev, chip);
+	chip->pctl = devm_pinctrl_register(chip->dev, pctldesc, chip);
 	if (IS_ERR(chip->pctl)) {
 		dev_err(chip->dev, "unable to register pinctrl device\n");
 		return PTR_ERR(chip->pctl);
@@ -632,11 +632,6 @@
 	return 0;
 }
 
-static void iproc_gpio_unregister_pinconf(struct iproc_gpio *chip)
-{
-	pinctrl_unregister(chip->pctl);
-}
-
 static const struct of_device_id iproc_gpio_of_match[] = {
 	{ .compatible = "brcm,cygnus-ccm-gpio" },
 	{ .compatible = "brcm,cygnus-asiu-gpio" },
@@ -720,7 +715,7 @@
 					   handle_simple_irq, IRQ_TYPE_NONE);
 		if (ret) {
 			dev_err(dev, "no GPIO irqchip\n");
-			goto err_unregister_pinconf;
+			goto err_rm_gpiochip;
 		}
 
 		gpiochip_set_chained_irqchip(gc, &iproc_gpio_irq_chip, irq,
@@ -729,9 +724,6 @@
 
 	return 0;
 
-err_unregister_pinconf:
-	iproc_gpio_unregister_pinconf(chip);
-
 err_rm_gpiochip:
 	gpiochip_remove(gc);
 
diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c
new file mode 100644
index 0000000..3fefd14
--- /dev/null
+++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c
@@ -0,0 +1,1117 @@
+/* Copyright (C) 2016 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This file contains the Northstar2 IOMUX driver that supports group
+ * based PINMUX configuration. The PWM is functional only when the
+ * corresponding mfio pin group is selected as gpio.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "../core.h"
+#include "../pinctrl-utils.h"
+
+#define NS2_NUM_IOMUX			19
+#define NS2_NUM_PWM_MUX			4
+
+#define NS2_PIN_MUX_BASE0		0x00
+#define NS2_PIN_MUX_BASE1		0x01
+#define NS2_PIN_CONF_BASE		0x02
+#define NS2_MUX_PAD_FUNC1_OFFSET	0x04
+
+#define NS2_PIN_SRC_MASK		0x01
+#define NS2_PIN_PULL_MASK		0x03
+#define NS2_PIN_DRIVE_STRENGTH_MASK	0x07
+
+#define NS2_PIN_PULL_UP			0x01
+#define NS2_PIN_PULL_DOWN		0x02
+
+#define NS2_PIN_INPUT_EN_MASK		0x01
+
+/*
+ * Northstar2 IOMUX register description
+ *
+ * @base: base address number
+ * @offset: register offset for mux configuration of a group
+ * @shift: bit shift for mux configuration of a group
+ * @mask: mask bits
+ * @alt: alternate function to set to
+ */
+struct ns2_mux {
+	unsigned int base;
+	unsigned int offset;
+	unsigned int shift;
+	unsigned int mask;
+	unsigned int alt;
+};
+
+/*
+ * Keep track of Northstar2 IOMUX configuration and prevent double
+ * configuration
+ *
+ * @ns2_mux: Northstar2 IOMUX register description
+ * @is_configured: flag to indicate whether a mux setting has already
+ * been configured
+ */
+struct ns2_mux_log {
+	struct ns2_mux mux;
+	bool is_configured;
+};
+
+/*
+ * Group based IOMUX configuration
+ *
+ * @name: name of the group
+ * @pins: array of pins used by this group
+ * @num_pins: total number of pins used by this group
+ * @mux: Northstar2 group based IOMUX configuration
+ */
+struct ns2_pin_group {
+	const char *name;
+	const unsigned int *pins;
+	const unsigned int num_pins;
+	const struct ns2_mux mux;
+};
+
+/*
+ * Northstar2 mux function and supported pin groups
+ *
+ * @name: name of the function
+ * @groups: array of groups that can be supported by this function
+ * @num_groups: total number of groups that can be supported by function
+ */
+struct ns2_pin_function {
+	const char *name;
+	const char * const *groups;
+	const unsigned int num_groups;
+};
+
+/*
+ * Northstar2 IOMUX pinctrl core
+ *
+ * @pctl: pointer to pinctrl_dev
+ * @dev: pointer to device
+ * @base0: first IOMUX register base
+ * @base1: second IOMUX register base
+ * @pinconf_base: configuration register base
+ * @groups: pointer to array of groups
+ * @num_groups: total number of groups
+ * @functions: pointer to array of functions
+ * @num_functions: total number of functions
+ * @mux_log: pointer to the array of mux logs
+ * @lock: lock to protect register access
+ */
+struct ns2_pinctrl {
+	struct pinctrl_dev *pctl;
+	struct device *dev;
+	void __iomem *base0;
+	void __iomem *base1;
+	void __iomem *pinconf_base;
+
+	const struct ns2_pin_group *groups;
+	unsigned int num_groups;
+
+	const struct ns2_pin_function *functions;
+	unsigned int num_functions;
+
+	struct ns2_mux_log *mux_log;
+
+	spinlock_t lock;
+};
+
+/*
+ * Pin configuration info
+ *
+ * @base: base address number
+ * @offset: register offset from base
+ * @src_shift: slew rate control bit shift in the register
+ * @input_en: input enable control bit shift
+ * @pull_shift: pull-up/pull-down control bit shift in the register
+ * @drive_shift: drive strength control bit shift in the register
+ */
+struct ns2_pinconf {
+	unsigned int base;
+	unsigned int offset;
+	unsigned int src_shift;
+	unsigned int input_en;
+	unsigned int pull_shift;
+	unsigned int drive_shift;
+};
+
+/*
+ * Description of a pin in Northstar2
+ *
+ * @pin: pin number
+ * @name: pin name
+ * @pin_conf: pin configuration structure
+ */
+struct ns2_pin {
+	unsigned int pin;
+	char *name;
+	struct ns2_pinconf pin_conf;
+};
+
+#define NS2_PIN_DESC(p, n, b, o, s, i, pu, d)	\
+{						\
+	.pin = p,				\
+	.name = n,				\
+	.pin_conf = {				\
+		.base = b,			\
+		.offset = o,			\
+		.src_shift = s,			\
+		.input_en = i,			\
+		.pull_shift = pu,		\
+		.drive_shift = d,		\
+	}					\
+}
+
+/*
+ * List of pins in Northstar2
+ */
+static struct ns2_pin ns2_pins[] = {
+	NS2_PIN_DESC(0, "mfio_0", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(1, "mfio_1", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(2, "mfio_2", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(3, "mfio_3", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(4, "mfio_4", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(5, "mfio_5", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(6, "mfio_6", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(7, "mfio_7", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(8, "mfio_8", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(9, "mfio_9", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(10, "mfio_10", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(11, "mfio_11", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(12, "mfio_12", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(13, "mfio_13", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(14, "mfio_14", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(15, "mfio_15", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(16, "mfio_16", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(17, "mfio_17", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(18, "mfio_18", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(19, "mfio_19", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(20, "mfio_20", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(21, "mfio_21", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(22, "mfio_22", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(23, "mfio_23", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(24, "mfio_24", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(25, "mfio_25", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(26, "mfio_26", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(27, "mfio_27", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(28, "mfio_28", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(29, "mfio_29", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(30, "mfio_30", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(31, "mfio_31", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(32, "mfio_32", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(33, "mfio_33", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(34, "mfio_34", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(35, "mfio_35", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(36, "mfio_36", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(37, "mfio_37", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(38, "mfio_38", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(39, "mfio_39", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(40, "mfio_40", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(41, "mfio_41", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(42, "mfio_42", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(43, "mfio_43", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(44, "mfio_44", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(45, "mfio_45", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(46, "mfio_46", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(47, "mfio_47", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(48, "mfio_48", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(49, "mfio_49", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(50, "mfio_50", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(51, "mfio_51", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(52, "mfio_52", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(53, "mfio_53", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(54, "mfio_54", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(55, "mfio_55", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(56, "mfio_56", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(57, "mfio_57", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(58, "mfio_58", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(59, "mfio_59", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(60, "mfio_60", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(61, "mfio_61", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(62, "mfio_62", -1, 0, 0, 0, 0, 0),
+	NS2_PIN_DESC(63, "qspi_wp", 2, 0x0, 31, 30, 27, 24),
+	NS2_PIN_DESC(64, "qspi_hold", 2, 0x0, 23, 22, 19, 16),
+	NS2_PIN_DESC(65, "qspi_cs", 2, 0x0, 15, 14, 11, 8),
+	NS2_PIN_DESC(66, "qspi_sck", 2, 0x0, 7, 6, 3, 0),
+	NS2_PIN_DESC(67, "uart3_sin", 2, 0x04, 31, 30, 27, 24),
+	NS2_PIN_DESC(68, "uart3_sout", 2, 0x04, 23, 22, 19, 16),
+	NS2_PIN_DESC(69, "qspi_mosi", 2, 0x04, 15, 14, 11, 8),
+	NS2_PIN_DESC(70, "qspi_miso", 2, 0x04, 7, 6, 3, 0),
+	NS2_PIN_DESC(71, "spi0_fss", 2, 0x08, 31, 30, 27, 24),
+	NS2_PIN_DESC(72, "spi0_rxd", 2, 0x08, 23, 22, 19, 16),
+	NS2_PIN_DESC(73, "spi0_txd", 2, 0x08, 15, 14, 11, 8),
+	NS2_PIN_DESC(74, "spi0_sck", 2, 0x08, 7, 6, 3, 0),
+	NS2_PIN_DESC(75, "spi1_fss", 2, 0x0c, 31, 30, 27, 24),
+	NS2_PIN_DESC(76, "spi1_rxd", 2, 0x0c, 23, 22, 19, 16),
+	NS2_PIN_DESC(77, "spi1_txd", 2, 0x0c, 15, 14, 11, 8),
+	NS2_PIN_DESC(78, "spi1_sck", 2, 0x0c, 7, 6, 3, 0),
+	NS2_PIN_DESC(79, "sdio0_data7", 2, 0x10, 31, 30, 27, 24),
+	NS2_PIN_DESC(80, "sdio0_emmc_rst", 2, 0x10, 23, 22, 19, 16),
+	NS2_PIN_DESC(81, "sdio0_led_on", 2, 0x10, 15, 14, 11, 8),
+	NS2_PIN_DESC(82, "sdio0_wp", 2, 0x10, 7, 6, 3, 0),
+	NS2_PIN_DESC(83, "sdio0_data3", 2, 0x14, 31, 30, 27, 24),
+	NS2_PIN_DESC(84, "sdio0_data4", 2, 0x14, 23, 22, 19, 16),
+	NS2_PIN_DESC(85, "sdio0_data5", 2, 0x14, 15, 14, 11, 8),
+	NS2_PIN_DESC(86, "sdio0_data6", 2, 0x14, 7, 6, 3, 0),
+	NS2_PIN_DESC(87, "sdio0_cmd", 2, 0x18, 31, 30, 27, 24),
+	NS2_PIN_DESC(88, "sdio0_data0", 2, 0x18, 23, 22, 19, 16),
+	NS2_PIN_DESC(89, "sdio0_data1", 2, 0x18, 15, 14, 11, 8),
+	NS2_PIN_DESC(90, "sdio0_data2", 2, 0x18, 7, 6, 3, 0),
+	NS2_PIN_DESC(91, "sdio1_led_on", 2, 0x1c, 31, 30, 27, 24),
+	NS2_PIN_DESC(92, "sdio1_wp", 2, 0x1c, 23, 22, 19, 16),
+	NS2_PIN_DESC(93, "sdio0_cd_l", 2, 0x1c, 15, 14, 11, 8),
+	NS2_PIN_DESC(94, "sdio0_clk", 2, 0x1c, 7, 6, 3, 0),
+	NS2_PIN_DESC(95, "sdio1_data5", 2, 0x20, 31, 30, 27, 24),
+	NS2_PIN_DESC(96, "sdio1_data6", 2, 0x20, 23, 22, 19, 16),
+	NS2_PIN_DESC(97, "sdio1_data7", 2, 0x20, 15, 14, 11, 8),
+	NS2_PIN_DESC(98, "sdio1_emmc_rst", 2, 0x20, 7, 6, 3, 0),
+	NS2_PIN_DESC(99, "sdio1_data1", 2, 0x24, 31, 30, 27, 24),
+	NS2_PIN_DESC(100, "sdio1_data2", 2, 0x24, 23, 22, 19, 16),
+	NS2_PIN_DESC(101, "sdio1_data3", 2, 0x24, 15, 14, 11, 8),
+	NS2_PIN_DESC(102, "sdio1_data4", 2, 0x24, 7, 6, 3, 0),
+	NS2_PIN_DESC(103, "sdio1_cd_l", 2, 0x28, 31, 30, 27, 24),
+	NS2_PIN_DESC(104, "sdio1_clk", 2, 0x28, 23, 22, 19, 16),
+	NS2_PIN_DESC(105, "sdio1_cmd", 2, 0x28, 15, 14, 11, 8),
+	NS2_PIN_DESC(106, "sdio1_data0", 2, 0x28, 7, 6, 3, 0),
+	NS2_PIN_DESC(107, "ext_mdio_0", 2, 0x2c, 15, 14, 11, 8),
+	NS2_PIN_DESC(108, "ext_mdc_0", 2, 0x2c, 7, 6, 3, 0),
+	NS2_PIN_DESC(109, "usb3_p1_vbus_ppc", 2, 0x34, 31, 30, 27, 24),
+	NS2_PIN_DESC(110, "usb3_p1_overcurrent", 2, 0x34, 23, 22, 19, 16),
+	NS2_PIN_DESC(111, "usb3_p0_vbus_ppc", 2, 0x34, 15, 14, 11, 8),
+	NS2_PIN_DESC(112, "usb3_p0_overcurrent", 2, 0x34, 7, 6, 3, 0),
+	NS2_PIN_DESC(113, "usb2_presence_indication", 2, 0x38, 31, 30, 27, 24),
+	NS2_PIN_DESC(114, "usb2_vbus_present", 2, 0x38, 23, 22, 19, 16),
+	NS2_PIN_DESC(115, "usb2_vbus_ppc", 2, 0x38, 15, 14, 11, 8),
+	NS2_PIN_DESC(116, "usb2_overcurrent", 2, 0x38, 7, 6, 3, 0),
+	NS2_PIN_DESC(117, "sata_led1", 2, 0x3c, 15, 14, 11, 8),
+	NS2_PIN_DESC(118, "sata_led0", 2, 0x3c, 7, 6, 3, 0),
+};
+
+/*
+ * List of groups of pins
+ */
+
+static const unsigned int nand_pins[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+	11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+static const unsigned int nor_data_pins[] =  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+	10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25};
+
+static const unsigned int gpio_0_1_pins[] = {24, 25};
+static const unsigned int pwm_0_pins[] = {24};
+static const unsigned int pwm_1_pins[] = {25};
+
+static const unsigned int uart1_ext_clk_pins[] = {26};
+static const unsigned int nor_adv_pins[] = {26};
+
+static const unsigned int gpio_2_5_pins[] = {27, 28, 29, 30};
+static const unsigned int pcie_ab1_clk_wak_pins[] = {27, 28, 29, 30};
+static const unsigned int nor_addr_0_3_pins[] = {27, 28, 29, 30};
+static const unsigned int pwm_2_pins[] = {27};
+static const unsigned int pwm_3_pins[] = {28};
+
+static const unsigned int gpio_6_7_pins[] = {31, 32};
+static const unsigned int pcie_a3_clk_wak_pins[] = {31, 32};
+static const unsigned int nor_addr_4_5_pins[] = {31, 32};
+
+static const unsigned int gpio_8_9_pins[] = {33, 34};
+static const unsigned int pcie_b3_clk_wak_pins[] = {33, 34};
+static const unsigned int nor_addr_6_7_pins[] = {33, 34};
+
+static const unsigned int gpio_10_11_pins[] = {35, 36};
+static const unsigned int pcie_b2_clk_wak_pins[] = {35, 36};
+static const unsigned int nor_addr_8_9_pins[] = {35, 36};
+
+static const unsigned int gpio_12_13_pins[] = {37, 38};
+static const unsigned int pcie_a2_clk_wak_pins[] = {37, 38};
+static const unsigned int nor_addr_10_11_pins[] = {37, 38};
+
+static const unsigned int gpio_14_17_pins[] = {39, 40, 41, 42};
+static const unsigned int uart0_modem_pins[] = {39, 40, 41, 42};
+static const unsigned int nor_addr_12_15_pins[] = {39, 40, 41, 42};
+
+static const unsigned int gpio_18_19_pins[] = {43, 44};
+static const unsigned int uart0_rts_cts_pins[] = {43, 44};
+
+static const unsigned int gpio_20_21_pins[] = {45, 46};
+static const unsigned int uart0_in_out_pins[] = {45, 46};
+
+static const unsigned int gpio_22_23_pins[] = {47, 48};
+static const unsigned int uart1_dcd_dsr_pins[] = {47, 48};
+
+static const unsigned int gpio_24_25_pins[] = {49, 50};
+static const unsigned int uart1_ri_dtr_pins[] = {49, 50};
+
+static const unsigned int gpio_26_27_pins[] = {51, 52};
+static const unsigned int uart1_rts_cts_pins[] = {51, 52};
+
+static const unsigned int gpio_28_29_pins[] = {53, 54};
+static const unsigned int uart1_in_out_pins[] = {53, 54};
+
+static const unsigned int gpio_30_31_pins[] = {55, 56};
+static const unsigned int uart2_rts_cts_pins[] = {55, 56};
+
+#define NS2_PIN_GROUP(group_name, ba, off, sh, ma, al)	\
+{							\
+	.name = __stringify(group_name) "_grp",		\
+	.pins = group_name ## _pins,			\
+	.num_pins = ARRAY_SIZE(group_name ## _pins),	\
+	.mux = {					\
+		.base = ba,				\
+		.offset = off,				\
+		.shift = sh,				\
+		.mask = ma,				\
+		.alt = al,				\
+	}						\
+}
+
+/*
+ * List of Northstar2 pin groups
+ */
+static const struct ns2_pin_group ns2_pin_groups[] = {
+	NS2_PIN_GROUP(nand, 0, 0, 31, 1, 0),
+	NS2_PIN_GROUP(nor_data, 0, 0, 31, 1, 1),
+	NS2_PIN_GROUP(gpio_0_1, 0, 0, 31, 1, 0),
+
+	NS2_PIN_GROUP(uart1_ext_clk, 0, 4, 30, 3, 1),
+	NS2_PIN_GROUP(nor_adv, 0, 4, 30, 3, 2),
+
+	NS2_PIN_GROUP(gpio_2_5,	0, 4, 28, 3, 0),
+	NS2_PIN_GROUP(pcie_ab1_clk_wak, 0, 4, 28, 3, 1),
+	NS2_PIN_GROUP(nor_addr_0_3, 0, 4, 28, 3, 2),
+
+	NS2_PIN_GROUP(gpio_6_7, 0, 4, 26, 3, 0),
+	NS2_PIN_GROUP(pcie_a3_clk_wak, 0, 4, 26, 3, 1),
+	NS2_PIN_GROUP(nor_addr_4_5, 0, 4, 26, 3, 2),
+
+	NS2_PIN_GROUP(gpio_8_9, 0, 4, 24, 3, 0),
+	NS2_PIN_GROUP(pcie_b3_clk_wak, 0, 4, 24, 3, 1),
+	NS2_PIN_GROUP(nor_addr_6_7, 0, 4, 24, 3, 2),
+
+	NS2_PIN_GROUP(gpio_10_11, 0, 4, 22, 3, 0),
+	NS2_PIN_GROUP(pcie_b2_clk_wak, 0, 4, 22, 3, 1),
+	NS2_PIN_GROUP(nor_addr_8_9, 0, 4, 22, 3, 2),
+
+	NS2_PIN_GROUP(gpio_12_13, 0, 4, 20, 3, 0),
+	NS2_PIN_GROUP(pcie_a2_clk_wak, 0, 4, 20, 3, 1),
+	NS2_PIN_GROUP(nor_addr_10_11, 0, 4, 20, 3, 2),
+
+	NS2_PIN_GROUP(gpio_14_17, 0, 4, 18, 3, 0),
+	NS2_PIN_GROUP(uart0_modem, 0, 4, 18, 3, 1),
+	NS2_PIN_GROUP(nor_addr_12_15, 0, 4, 18, 3, 2),
+
+	NS2_PIN_GROUP(gpio_18_19, 0, 4, 16, 3, 0),
+	NS2_PIN_GROUP(uart0_rts_cts, 0, 4, 16, 3, 1),
+
+	NS2_PIN_GROUP(gpio_20_21, 0, 4, 14, 3, 0),
+	NS2_PIN_GROUP(uart0_in_out, 0, 4, 14, 3, 1),
+
+	NS2_PIN_GROUP(gpio_22_23, 0, 4, 12, 3, 0),
+	NS2_PIN_GROUP(uart1_dcd_dsr, 0, 4, 12, 3, 1),
+
+	NS2_PIN_GROUP(gpio_24_25, 0, 4, 10, 3, 0),
+	NS2_PIN_GROUP(uart1_ri_dtr, 0, 4, 10, 3, 1),
+
+	NS2_PIN_GROUP(gpio_26_27, 0, 4, 8, 3, 0),
+	NS2_PIN_GROUP(uart1_rts_cts, 0, 4, 8, 3, 1),
+
+	NS2_PIN_GROUP(gpio_28_29, 0, 4, 6, 3, 0),
+	NS2_PIN_GROUP(uart1_in_out, 0, 4, 6, 3, 1),
+
+	NS2_PIN_GROUP(gpio_30_31, 0, 4, 4, 3, 0),
+	NS2_PIN_GROUP(uart2_rts_cts, 0, 4, 4, 3, 1),
+
+	NS2_PIN_GROUP(pwm_0, 1, 0, 0, 1, 1),
+	NS2_PIN_GROUP(pwm_1, 1, 0, 1, 1, 1),
+	NS2_PIN_GROUP(pwm_2, 1, 0, 2, 1, 1),
+	NS2_PIN_GROUP(pwm_3, 1, 0, 3, 1, 1),
+};
+
+/*
+ * List of groups supported by functions
+ */
+
+static const char * const nand_grps[] = {"nand_grp"};
+
+static const char * const nor_grps[] = {"nor_data_grp", "nor_adv_grp",
+	"nor_addr_0_3_grp", "nor_addr_4_5_grp",	"nor_addr_6_7_grp",
+	"nor_addr_8_9_grp", "nor_addr_10_11_grp", "nor_addr_12_15_grp"};
+
+static const char * const gpio_grps[] = {"gpio_0_1_grp", "gpio_2_5_grp",
+	"gpio_6_7_grp",	"gpio_8_9_grp",	"gpio_10_11_grp", "gpio_12_13_grp",
+	"gpio_14_17_grp", "gpio_18_19_grp", "gpio_20_21_grp", "gpio_22_23_grp",
+	"gpio_24_25_grp", "gpio_26_27_grp", "gpio_28_29_grp",
+	"gpio_30_31_grp"};
+
+static const char * const pcie_grps[] = {"pcie_ab1_clk_wak_grp",
+	"pcie_a3_clk_wak_grp", "pcie_b3_clk_wak_grp", "pcie_b2_clk_wak_grp",
+	"pcie_a2_clk_wak_grp"};
+
+static const char * const uart0_grps[] = {"uart0_modem_grp",
+	"uart0_rts_cts_grp", "uart0_in_out_grp"};
+
+static const char * const uart1_grps[] = {"uart1_ext_clk_grp",
+	"uart1_dcd_dsr_grp", "uart1_ri_dtr_grp", "uart1_rts_cts_grp",
+	"uart1_in_out_grp"};
+
+static const char * const uart2_grps[] = {"uart2_rts_cts_grp"};
+
+static const char * const pwm_grps[] = {"pwm_0_grp", "pwm_1_grp",
+	"pwm_2_grp", "pwm_3_grp"};
+
+#define NS2_PIN_FUNCTION(func)				\
+{							\
+	.name = #func,					\
+	.groups = func ## _grps,			\
+	.num_groups = ARRAY_SIZE(func ## _grps),	\
+}
+
+/*
+ * List of supported functions
+ */
+static const struct ns2_pin_function ns2_pin_functions[] = {
+	NS2_PIN_FUNCTION(nand),
+	NS2_PIN_FUNCTION(nor),
+	NS2_PIN_FUNCTION(gpio),
+	NS2_PIN_FUNCTION(pcie),
+	NS2_PIN_FUNCTION(uart0),
+	NS2_PIN_FUNCTION(uart1),
+	NS2_PIN_FUNCTION(uart2),
+	NS2_PIN_FUNCTION(pwm),
+};
+
+static int ns2_get_groups_count(struct pinctrl_dev *pctrl_dev)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	return pinctrl->num_groups;
+}
+
+static const char *ns2_get_group_name(struct pinctrl_dev *pctrl_dev,
+				      unsigned int selector)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	return pinctrl->groups[selector].name;
+}
+
+static int ns2_get_group_pins(struct pinctrl_dev *pctrl_dev,
+			      unsigned int selector, const unsigned int **pins,
+			      unsigned int *num_pins)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	*pins = pinctrl->groups[selector].pins;
+	*num_pins = pinctrl->groups[selector].num_pins;
+
+	return 0;
+}
+
+static void ns2_pin_dbg_show(struct pinctrl_dev *pctrl_dev,
+			     struct seq_file *s, unsigned int offset)
+{
+	seq_printf(s, " %s", dev_name(pctrl_dev->dev));
+}
+
+static struct pinctrl_ops ns2_pinctrl_ops = {
+	.get_groups_count = ns2_get_groups_count,
+	.get_group_name = ns2_get_group_name,
+	.get_group_pins = ns2_get_group_pins,
+	.pin_dbg_show = ns2_pin_dbg_show,
+	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
+	.dt_free_map = pinctrl_utils_free_map,
+};
+
+static int ns2_get_functions_count(struct pinctrl_dev *pctrl_dev)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	return pinctrl->num_functions;
+}
+
+static const char *ns2_get_function_name(struct pinctrl_dev *pctrl_dev,
+					 unsigned int selector)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	return pinctrl->functions[selector].name;
+}
+
+static int ns2_get_function_groups(struct pinctrl_dev *pctrl_dev,
+				   unsigned int selector,
+				   const char * const **groups,
+				   unsigned int * const num_groups)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+
+	*groups = pinctrl->functions[selector].groups;
+	*num_groups = pinctrl->functions[selector].num_groups;
+
+	return 0;
+}
+
+static int ns2_pinmux_set(struct ns2_pinctrl *pinctrl,
+			  const struct ns2_pin_function *func,
+			  const struct ns2_pin_group *grp,
+			  struct ns2_mux_log *mux_log)
+{
+	const struct ns2_mux *mux = &grp->mux;
+	int i;
+	u32 val, mask;
+	unsigned long flags;
+	void __iomem *base_address;
+
+	for (i = 0; i < NS2_NUM_IOMUX; i++) {
+		if ((mux->shift != mux_log[i].mux.shift) ||
+			(mux->base != mux_log[i].mux.base) ||
+			(mux->offset != mux_log[i].mux.offset))
+			continue;
+
+		/* if this is a new configuration, just do it! */
+		if (!mux_log[i].is_configured)
+			break;
+
+		/*
+		 * IOMUX has been configured previously and one is trying to
+		 * configure it to a different function
+		 */
+		if (mux_log[i].mux.alt != mux->alt) {
+			dev_err(pinctrl->dev,
+				"double configuration error detected!\n");
+			dev_err(pinctrl->dev, "func:%s grp:%s\n",
+				func->name, grp->name);
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+	if (i == NS2_NUM_IOMUX)
+		return -EINVAL;
+
+	mask = mux->mask;
+	mux_log[i].mux.alt = mux->alt;
+	mux_log[i].is_configured = true;
+
+	switch (mux->base) {
+	case NS2_PIN_MUX_BASE0:
+		base_address = pinctrl->base0;
+		break;
+
+	case NS2_PIN_MUX_BASE1:
+		base_address = pinctrl->base1;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(base_address + grp->mux.offset);
+	val &= ~(mask << grp->mux.shift);
+	val |= grp->mux.alt << grp->mux.shift;
+	writel(val, (base_address + grp->mux.offset));
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	return 0;
+}
+
+static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev,
+			     unsigned int func_select, unsigned int grp_select)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrl_dev);
+	const struct ns2_pin_function *func;
+	const struct ns2_pin_group *grp;
+
+	if (grp_select > pinctrl->num_groups ||
+		func_select > pinctrl->num_functions)
+		return -EINVAL;
+
+	func = &pinctrl->functions[func_select];
+	grp = &pinctrl->groups[grp_select];
+
+	dev_dbg(pctrl_dev->dev, "func:%u name:%s grp:%u name:%s\n",
+		func_select, func->name, grp_select, grp->name);
+
+	dev_dbg(pctrl_dev->dev, "offset:0x%08x shift:%u alt:%u\n",
+		grp->mux.offset, grp->mux.shift, grp->mux.alt);
+
+	return ns2_pinmux_set(pinctrl, func, grp, pinctrl->mux_log);
+}
+
+static int ns2_pin_set_enable(struct pinctrl_dev *pctrldev, unsigned int pin,
+			    u16 enable)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	u32 val;
+	void __iomem *base_address;
+
+	base_address = pinctrl->pinconf_base;
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(base_address + pin_data->pin_conf.offset);
+	val &= ~(NS2_PIN_SRC_MASK << pin_data->pin_conf.input_en);
+
+	if (!enable)
+		val |= NS2_PIN_INPUT_EN_MASK << pin_data->pin_conf.input_en;
+
+	writel(val, (base_address + pin_data->pin_conf.offset));
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u set enable:%d\n", pin, enable);
+	return 0;
+}
+
+static int ns2_pin_get_enable(struct pinctrl_dev *pctrldev, unsigned int pin)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	int enable;
+
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	enable = readl(pinctrl->pinconf_base + pin_data->pin_conf.offset);
+	enable = (enable >> pin_data->pin_conf.input_en) &
+			NS2_PIN_INPUT_EN_MASK;
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	if (!enable)
+		enable = NS2_PIN_INPUT_EN_MASK;
+	else
+		enable = 0;
+
+	dev_dbg(pctrldev->dev, "pin:%u get disable:%d\n", pin, enable);
+	return enable;
+}
+
+static int ns2_pin_set_slew(struct pinctrl_dev *pctrldev, unsigned int pin,
+			    u16 slew)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	u32 val;
+	void __iomem *base_address;
+
+	base_address = pinctrl->pinconf_base;
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(base_address + pin_data->pin_conf.offset);
+	val &= ~(NS2_PIN_SRC_MASK << pin_data->pin_conf.src_shift);
+
+	if (slew)
+		val |= NS2_PIN_SRC_MASK << pin_data->pin_conf.src_shift;
+
+	writel(val, (base_address + pin_data->pin_conf.offset));
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u set slew:%d\n", pin, slew);
+	return 0;
+}
+
+static int ns2_pin_get_slew(struct pinctrl_dev *pctrldev, unsigned int pin,
+			    u16 *slew)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(pinctrl->pinconf_base + pin_data->pin_conf.offset);
+	*slew = (val >> pin_data->pin_conf.src_shift) & NS2_PIN_SRC_MASK;
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u get slew:%d\n", pin, *slew);
+	return 0;
+}
+
+static int ns2_pin_set_pull(struct pinctrl_dev *pctrldev, unsigned int pin,
+			    bool pull_up, bool pull_down)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	u32 val;
+	void __iomem *base_address;
+
+	base_address = pinctrl->pinconf_base;
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(base_address + pin_data->pin_conf.offset);
+	val &= ~(NS2_PIN_PULL_MASK << pin_data->pin_conf.pull_shift);
+
+	if (pull_up == true)
+		val |= NS2_PIN_PULL_UP << pin_data->pin_conf.pull_shift;
+	if (pull_down == true)
+		val |= NS2_PIN_PULL_DOWN << pin_data->pin_conf.pull_shift;
+	writel(val, (base_address + pin_data->pin_conf.offset));
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u set pullup:%d pulldown: %d\n",
+		pin, pull_up, pull_down);
+	return 0;
+}
+
+static void ns2_pin_get_pull(struct pinctrl_dev *pctrldev,
+			     unsigned int pin, bool *pull_up,
+			     bool *pull_down)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(pinctrl->pinconf_base + pin_data->pin_conf.offset);
+	val = (val >> pin_data->pin_conf.pull_shift) & NS2_PIN_PULL_MASK;
+	*pull_up = false;
+	*pull_down = false;
+
+	if (val == NS2_PIN_PULL_UP)
+		*pull_up = true;
+
+	if (val == NS2_PIN_PULL_DOWN)
+		*pull_down = true;
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+}
+
+static int ns2_pin_set_strength(struct pinctrl_dev *pctrldev, unsigned int pin,
+				u16 strength)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	u32 val;
+	unsigned long flags;
+	void __iomem *base_address;
+
+	/* make sure drive strength is supported */
+	if (strength < 2 || strength > 16 || (strength % 2))
+		return -ENOTSUPP;
+
+	base_address = pinctrl->pinconf_base;
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(base_address + pin_data->pin_conf.offset);
+	val &= ~(NS2_PIN_DRIVE_STRENGTH_MASK << pin_data->pin_conf.drive_shift);
+	val |= ((strength / 2) - 1) << pin_data->pin_conf.drive_shift;
+	writel(val, (base_address + pin_data->pin_conf.offset));
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u set drive strength:%d mA\n",
+		pin, strength);
+	return 0;
+}
+
+static int ns2_pin_get_strength(struct pinctrl_dev *pctrldev, unsigned int pin,
+				 u16 *strength)
+{
+	struct ns2_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctrldev);
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pinctrl->lock, flags);
+	val = readl(pinctrl->pinconf_base + pin_data->pin_conf.offset);
+	*strength = (val >> pin_data->pin_conf.drive_shift) &
+					NS2_PIN_DRIVE_STRENGTH_MASK;
+	*strength = (*strength + 1) * 2;
+	spin_unlock_irqrestore(&pinctrl->lock, flags);
+
+	dev_dbg(pctrldev->dev, "pin:%u get drive strength:%d mA\n",
+		pin, *strength);
+	return 0;
+}
+
+static int ns2_pin_config_get(struct pinctrl_dev *pctldev, unsigned int pin,
+			      unsigned long *config)
+{
+	struct ns2_pin *pin_data = pctldev->desc->pins[pin].drv_data;
+	enum pin_config_param param = pinconf_to_config_param(*config);
+	bool pull_up, pull_down;
+	u16 arg = 0;
+	int ret;
+
+	if (pin_data->pin_conf.base == -1)
+		return -ENOTSUPP;
+
+	switch (param) {
+	case PIN_CONFIG_BIAS_DISABLE:
+		ns2_pin_get_pull(pctldev, pin, &pull_up, &pull_down);
+		if ((pull_up == false) && (pull_down == false))
+			return 0;
+		else
+			return -EINVAL;
+
+	case PIN_CONFIG_BIAS_PULL_UP:
+		ns2_pin_get_pull(pctldev, pin, &pull_up, &pull_down);
+		if (pull_up)
+			return 0;
+		else
+			return -EINVAL;
+
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		ns2_pin_get_pull(pctldev, pin, &pull_up, &pull_down);
+		if (pull_down)
+			return 0;
+		else
+			return -EINVAL;
+
+	case PIN_CONFIG_DRIVE_STRENGTH:
+		ret = ns2_pin_get_strength(pctldev, pin, &arg);
+		if (ret)
+			return ret;
+		*config = pinconf_to_config_packed(param, arg);
+		return 0;
+
+	case PIN_CONFIG_SLEW_RATE:
+		ret = ns2_pin_get_slew(pctldev, pin, &arg);
+		if (ret)
+			return ret;
+		*config = pinconf_to_config_packed(param, arg);
+		return 0;
+
+	case PIN_CONFIG_INPUT_ENABLE:
+		ret = ns2_pin_get_enable(pctldev, pin);
+		if (ret)
+			return 0;
+		else
+			return -EINVAL;
+
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static int ns2_pin_config_set(struct pinctrl_dev *pctrldev, unsigned int pin,
+			      unsigned long *configs, unsigned int num_configs)
+{
+	struct ns2_pin *pin_data = pctrldev->desc->pins[pin].drv_data;
+	enum pin_config_param param;
+	unsigned int i;
+	u16 arg;
+	int ret = -ENOTSUPP;
+
+	if (pin_data->pin_conf.base == -1)
+		return -ENOTSUPP;
+
+	for (i = 0; i < num_configs; i++) {
+		param = pinconf_to_config_param(configs[i]);
+		arg = pinconf_to_config_argument(configs[i]);
+
+		switch (param) {
+		case PIN_CONFIG_BIAS_DISABLE:
+			ret = ns2_pin_set_pull(pctrldev, pin, false, false);
+			if (ret < 0)
+				goto out;
+			break;
+
+		case PIN_CONFIG_BIAS_PULL_UP:
+			ret = ns2_pin_set_pull(pctrldev, pin, true, false);
+			if (ret < 0)
+				goto out;
+			break;
+
+		case PIN_CONFIG_BIAS_PULL_DOWN:
+			ret = ns2_pin_set_pull(pctrldev, pin, false, true);
+			if (ret < 0)
+				goto out;
+			break;
+
+		case PIN_CONFIG_DRIVE_STRENGTH:
+			ret = ns2_pin_set_strength(pctrldev, pin, arg);
+			if (ret < 0)
+				goto out;
+			break;
+
+		case PIN_CONFIG_SLEW_RATE:
+			ret = ns2_pin_set_slew(pctrldev, pin, arg);
+			if (ret < 0)
+				goto out;
+			break;
+
+		case PIN_CONFIG_INPUT_ENABLE:
+			ret = ns2_pin_set_enable(pctrldev, pin, arg);
+			if (ret < 0)
+				goto out;
+			break;
+
+		default:
+			dev_err(pctrldev->dev, "invalid configuration\n");
+			return -ENOTSUPP;
+		}
+	}
+out:
+	return ret;
+}
+static struct pinmux_ops ns2_pinmux_ops = {
+	.get_functions_count = ns2_get_functions_count,
+	.get_function_name = ns2_get_function_name,
+	.get_function_groups = ns2_get_function_groups,
+	.set_mux = ns2_pinmux_enable,
+};
+
+static const struct pinconf_ops ns2_pinconf_ops = {
+	.is_generic = true,
+	.pin_config_get = ns2_pin_config_get,
+	.pin_config_set = ns2_pin_config_set,
+};
+
+static struct pinctrl_desc ns2_pinctrl_desc = {
+	.name = "ns2-pinmux",
+	.pctlops = &ns2_pinctrl_ops,
+	.pmxops = &ns2_pinmux_ops,
+	.confops = &ns2_pinconf_ops,
+};
+
+static int ns2_mux_log_init(struct ns2_pinctrl *pinctrl)
+{
+	struct ns2_mux_log *log;
+	unsigned int i;
+
+	pinctrl->mux_log = devm_kcalloc(pinctrl->dev, NS2_NUM_IOMUX,
+					sizeof(struct ns2_mux_log),
+					GFP_KERNEL);
+	if (!pinctrl->mux_log)
+		return -ENOMEM;
+
+	for (i = 0; i < NS2_NUM_IOMUX; i++)
+		pinctrl->mux_log[i].is_configured = false;
+	/* Group 0 uses bit 31 in the IOMUX_PAD_FUNCTION_0 register */
+	log = &pinctrl->mux_log[0];
+	log->mux.base = NS2_PIN_MUX_BASE0;
+	log->mux.offset = 0;
+	log->mux.shift = 31;
+	log->mux.alt = 0;
+
+	/*
+	 * Groups 1 through 14 use two bits each in the
+	 * IOMUX_PAD_FUNCTION_1 register starting with
+	 * bit position 30.
+	 */
+	for (i = 1; i < (NS2_NUM_IOMUX - NS2_NUM_PWM_MUX); i++) {
+		log = &pinctrl->mux_log[i];
+		log->mux.base = NS2_PIN_MUX_BASE0;
+		log->mux.offset = NS2_MUX_PAD_FUNC1_OFFSET;
+		log->mux.shift = 32 - (i * 2);
+		log->mux.alt = 0;
+	}
+
+	/*
+	 * Groups 15 through 18 use one bit each in the
+	 * AUX_SEL register.
+	 */
+	for (i = 0; i < NS2_NUM_PWM_MUX; i++) {
+		log = &pinctrl->mux_log[(NS2_NUM_IOMUX - NS2_NUM_PWM_MUX) + i];
+		log->mux.base = NS2_PIN_MUX_BASE1;
+		log->mux.offset = 0;
+		log->mux.shift = i;
+		log->mux.alt =  0;
+	}
+	return 0;
+}
+
+static int ns2_pinmux_probe(struct platform_device *pdev)
+{
+	struct ns2_pinctrl *pinctrl;
+	struct resource *res;
+	int i, ret;
+	struct pinctrl_pin_desc *pins;
+	unsigned int num_pins = ARRAY_SIZE(ns2_pins);
+
+	pinctrl = devm_kzalloc(&pdev->dev, sizeof(*pinctrl), GFP_KERNEL);
+	if (!pinctrl)
+		return -ENOMEM;
+
+	pinctrl->dev = &pdev->dev;
+	platform_set_drvdata(pdev, pinctrl);
+	spin_lock_init(&pinctrl->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pinctrl->base0 = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pinctrl->base0)) {
+		dev_err(&pdev->dev, "unable to map I/O space\n");
+		return PTR_ERR(pinctrl->base0);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pinctrl->base1 = devm_ioremap_nocache(&pdev->dev, res->start,
+					resource_size(res));
+	if (!pinctrl->base1) {
+		dev_err(&pdev->dev, "unable to map I/O space\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	pinctrl->pinconf_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pinctrl->pinconf_base)) {
+		dev_err(&pdev->dev, "unable to map I/O space\n");
+		return PTR_ERR(pinctrl->pinconf_base);
+	}
+
+	ret = ns2_mux_log_init(pinctrl);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to initialize IOMUX log\n");
+		return ret;
+	}
+
+	pins = devm_kcalloc(&pdev->dev, num_pins, sizeof(*pins), GFP_KERNEL);
+	if (!pins)
+		return -ENOMEM;
+
+	for (i = 0; i < num_pins; i++) {
+		pins[i].number = ns2_pins[i].pin;
+		pins[i].name = ns2_pins[i].name;
+		pins[i].drv_data = &ns2_pins[i];
+	}
+
+	pinctrl->groups = ns2_pin_groups;
+	pinctrl->num_groups = ARRAY_SIZE(ns2_pin_groups);
+	pinctrl->functions = ns2_pin_functions;
+	pinctrl->num_functions = ARRAY_SIZE(ns2_pin_functions);
+	ns2_pinctrl_desc.pins = pins;
+	ns2_pinctrl_desc.npins = num_pins;
+
+	pinctrl->pctl = pinctrl_register(&ns2_pinctrl_desc, &pdev->dev,
+			pinctrl);
+	if (!pinctrl->pctl) {
+		dev_err(&pdev->dev, "unable to register IOMUX pinctrl\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id ns2_pinmux_of_match[] = {
+	{.compatible = "brcm,ns2-pinmux"},
+	{ }
+};
+
+static struct platform_driver ns2_pinmux_driver = {
+	.driver = {
+		.name = "ns2-pinmux",
+		.of_match_table = ns2_pinmux_of_match,
+	},
+	.probe = ns2_pinmux_probe,
+};
+
+static int __init ns2_pinmux_init(void)
+{
+	return platform_driver_register(&ns2_pinmux_driver);
+}
+arch_initcall(ns2_pinmux_init);
diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
index ac90043..a8b37a9 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -363,7 +363,7 @@
 	.get_groups_count = nsp_get_groups_count,
 	.get_group_name = nsp_get_group_name,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int nsp_gpio_set_slew(struct nsp_gpio *chip, unsigned gpio, u16 slew)
@@ -609,7 +609,7 @@
 	pctldesc->npins = gc->ngpio;
 	pctldesc->confops = &nsp_pconf_ops;
 
-	chip->pctl = pinctrl_register(pctldesc, chip->dev, chip);
+	chip->pctl = devm_pinctrl_register(chip->dev, pctldesc, chip);
 	if (IS_ERR(chip->pctl)) {
 		dev_err(chip->dev, "unable to register pinctrl device\n");
 		return PTR_ERR(chip->pctl);
diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c
index 46f2b48..8f0dc02 100644
--- a/drivers/pinctrl/berlin/berlin.c
+++ b/drivers/pinctrl/berlin/berlin.c
@@ -104,7 +104,7 @@
 	.get_groups_count	= &berlin_pinctrl_get_group_count,
 	.get_group_name		= &berlin_pinctrl_get_group_name,
 	.dt_node_to_map		= &berlin_pinctrl_dt_node_to_map,
-	.dt_free_map		= &pinctrl_utils_dt_free_map,
+	.dt_free_map		= &pinctrl_utils_free_map,
 };
 
 static int berlin_pinmux_get_functions_count(struct pinctrl_dev *pctrl_dev)
@@ -316,7 +316,8 @@
 		return ret;
 	}
 
-	pctrl->pctrl_dev = pinctrl_register(&berlin_pctrl_desc, dev, pctrl);
+	pctrl->pctrl_dev = devm_pinctrl_register(dev, &berlin_pctrl_desc,
+						 pctrl);
 	if (IS_ERR(pctrl->pctrl_dev)) {
 		dev_err(dev, "failed to register pinctrl driver\n");
 		return PTR_ERR(pctrl->pctrl_dev);
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index f67a8b7..98d2a1b 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1872,6 +1872,69 @@
 }
 EXPORT_SYMBOL_GPL(pinctrl_unregister);
 
+static void devm_pinctrl_dev_release(struct device *dev, void *res)
+{
+	struct pinctrl_dev *pctldev = *(struct pinctrl_dev **)res;
+
+	pinctrl_unregister(pctldev);
+}
+
+static int devm_pinctrl_dev_match(struct device *dev, void *res, void *data)
+{
+	struct pctldev **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_pinctrl_register() - Resource managed version of pinctrl_register().
+ * @dev: parent device for this pin controller
+ * @pctldesc: descriptor for this pin controller
+ * @driver_data: private pin controller data for this pin controller
+ *
+ * Returns an error pointer if pincontrol register failed. Otherwise
+ * it returns valid pinctrl handle.
+ *
+ * The pinctrl device will be automatically released when the device is unbound.
+ */
+struct pinctrl_dev *devm_pinctrl_register(struct device *dev,
+					  struct pinctrl_desc *pctldesc,
+					  void *driver_data)
+{
+	struct pinctrl_dev **ptr, *pctldev;
+
+	ptr = devres_alloc(devm_pinctrl_dev_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	pctldev = pinctrl_register(pctldesc, dev, driver_data);
+	if (IS_ERR(pctldev)) {
+		devres_free(ptr);
+		return pctldev;
+	}
+
+	*ptr = pctldev;
+	devres_add(dev, ptr);
+
+	return pctldev;
+}
+EXPORT_SYMBOL_GPL(devm_pinctrl_register);
+
+/**
+ * devm_pinctrl_unregister() - Resource managed version of pinctrl_unregister().
+ * @dev: device for which which resource was allocated
+ * @pctldev: the pinctrl device to unregister.
+ */
+void devm_pinctrl_unregister(struct device *dev, struct pinctrl_dev *pctldev)
+{
+	WARN_ON(devres_release(dev, devm_pinctrl_dev_release,
+			       devm_pinctrl_dev_match, pctldev));
+}
+EXPORT_SYMBOL_GPL(devm_pinctrl_unregister);
+
 static int __init pinctrl_init(void)
 {
 	pr_info("initialized pinctrl subsystem\n");
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 9cfa544..47ccfcc 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -789,7 +789,7 @@
 	ipctl->info = info;
 	ipctl->dev = info->dev;
 	platform_set_drvdata(pdev, ipctl);
-	ipctl->pctl = pinctrl_register(&imx_pinctrl_desc, &pdev->dev, ipctl);
+	ipctl->pctl = devm_pinctrl_register(&pdev->dev, &imx_pinctrl_desc, ipctl);
 	if (IS_ERR(ipctl->pctl)) {
 		dev_err(&pdev->dev, "could not register IMX pinctrl driver\n");
 		return PTR_ERR(ipctl->pctl);
@@ -799,12 +799,3 @@
 
 	return 0;
 }
-
-int imx_pinctrl_remove(struct platform_device *pdev)
-{
-	struct imx_pinctrl *ipctl = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(ipctl->pctl);
-
-	return 0;
-}
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.h b/drivers/pinctrl/freescale/pinctrl-imx.h
index 3b8bd81..8af8aa2 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.h
+++ b/drivers/pinctrl/freescale/pinctrl-imx.h
@@ -99,5 +99,4 @@
 
 int imx_pinctrl_probe(struct platform_device *pdev,
 			struct imx_pinctrl_soc_info *info);
-int imx_pinctrl_remove(struct platform_device *pdev);
 #endif /* __DRIVERS_PINCTRL_IMX_H */
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
index acaf84c..b4400cb 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
@@ -635,7 +635,7 @@
 	ipctl->info = info;
 	ipctl->dev = info->dev;
 	platform_set_drvdata(pdev, ipctl);
-	ipctl->pctl = pinctrl_register(pctl_desc, &pdev->dev, ipctl);
+	ipctl->pctl = devm_pinctrl_register(&pdev->dev, pctl_desc, ipctl);
 	if (IS_ERR(ipctl->pctl)) {
 		dev_err(&pdev->dev, "could not register IMX pinctrl driver\n");
 		return PTR_ERR(ipctl->pctl);
@@ -652,12 +652,3 @@
 
 	return 0;
 }
-
-int imx1_pinctrl_core_remove(struct platform_device *pdev)
-{
-	struct imx1_pinctrl *ipctl = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(ipctl->pctl);
-
-	return 0;
-}
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1.c b/drivers/pinctrl/freescale/pinctrl-imx1.c
index d3bacb7..0472345 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1.c
@@ -269,7 +269,6 @@
 		.name		= "imx1-pinctrl",
 		.of_match_table	= imx1_pinctrl_of_match,
 	},
-	.remove	= imx1_pinctrl_core_remove,
 };
 module_platform_driver_probe(imx1_pinctrl_driver, imx1_pinctrl_probe);
 
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1.h b/drivers/pinctrl/freescale/pinctrl-imx1.h
index 692a54c..1740743 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1.h
+++ b/drivers/pinctrl/freescale/pinctrl-imx1.h
@@ -69,5 +69,4 @@
 
 int imx1_pinctrl_core_probe(struct platform_device *pdev,
 			struct imx1_pinctrl_soc_info *info);
-int imx1_pinctrl_core_remove(struct platform_device *pdev);
 #endif /* __DRIVERS_PINCTRL_IMX1_H */
diff --git a/drivers/pinctrl/freescale/pinctrl-imx21.c b/drivers/pinctrl/freescale/pinctrl-imx21.c
index 9d9aca3..aa1221f 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx21.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx21.c
@@ -332,7 +332,6 @@
 		.name		= "imx21-pinctrl",
 		.of_match_table	= imx21_pinctrl_of_match,
 	},
-	.remove	= imx1_pinctrl_core_remove,
 };
 module_platform_driver_probe(imx21_pinctrl_driver, imx21_pinctrl_probe);
 
diff --git a/drivers/pinctrl/freescale/pinctrl-imx25.c b/drivers/pinctrl/freescale/pinctrl-imx25.c
index 293ed43..81ad546 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx25.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx25.c
@@ -331,7 +331,6 @@
 		.of_match_table = of_match_ptr(imx25_pinctrl_of_match),
 	},
 	.probe = imx25_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx25_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx27.c b/drivers/pinctrl/freescale/pinctrl-imx27.c
index a461d588..f828fbb 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx27.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx27.c
@@ -405,7 +405,6 @@
 		.of_match_table = of_match_ptr(imx27_pinctrl_of_match),
 	},
 	.probe = imx27_pinctrl_probe,
-	.remove = imx1_pinctrl_core_remove,
 };
 
 static int __init imx27_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx35.c b/drivers/pinctrl/freescale/pinctrl-imx35.c
index 9109c10..13eb224 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx35.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx35.c
@@ -1021,7 +1021,6 @@
 		.of_match_table = imx35_pinctrl_of_match,
 	},
 	.probe = imx35_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx35_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx50.c b/drivers/pinctrl/freescale/pinctrl-imx50.c
index 8acc4d9..95a36c8 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx50.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx50.c
@@ -408,7 +408,6 @@
 		.of_match_table = of_match_ptr(imx50_pinctrl_of_match),
 	},
 	.probe = imx50_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx50_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx51.c b/drivers/pinctrl/freescale/pinctrl-imx51.c
index 8dec494..0863e527 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx51.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx51.c
@@ -784,7 +784,6 @@
 		.of_match_table = imx51_pinctrl_of_match,
 	},
 	.probe = imx51_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx51_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx53.c b/drivers/pinctrl/freescale/pinctrl-imx53.c
index d39dfd6..64c9cbe 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx53.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx53.c
@@ -471,7 +471,6 @@
 		.of_match_table = imx53_pinctrl_of_match,
 	},
 	.probe = imx53_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx53_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx6dl.c b/drivers/pinctrl/freescale/pinctrl-imx6dl.c
index 5a2cdb0..de17bac 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx6dl.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx6dl.c
@@ -477,7 +477,6 @@
 		.of_match_table = imx6dl_pinctrl_of_match,
 	},
 	.probe = imx6dl_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx6dl_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx6q.c b/drivers/pinctrl/freescale/pinctrl-imx6q.c
index 7d50a36..55cd8a0 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx6q.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx6q.c
@@ -483,7 +483,6 @@
 		.of_match_table = imx6q_pinctrl_of_match,
 	},
 	.probe = imx6q_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx6q_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx6sl.c b/drivers/pinctrl/freescale/pinctrl-imx6sl.c
index e27d17f..bf455b8 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx6sl.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx6sl.c
@@ -384,7 +384,6 @@
 		.of_match_table = imx6sl_pinctrl_of_match,
 	},
 	.probe = imx6sl_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx6sl_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx6sx.c b/drivers/pinctrl/freescale/pinctrl-imx6sx.c
index 117180c..84118c3 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx6sx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx6sx.c
@@ -387,7 +387,6 @@
 		.of_match_table = of_match_ptr(imx6sx_pinctrl_of_match),
 	},
 	.probe = imx6sx_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx6sx_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx6ul.c b/drivers/pinctrl/freescale/pinctrl-imx6ul.c
index 78627c7..c707fdd 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx6ul.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx6ul.c
@@ -303,7 +303,6 @@
 		.of_match_table = of_match_ptr(imx6ul_pinctrl_of_match),
 	},
 	.probe = imx6ul_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx6ul_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-imx7d.c b/drivers/pinctrl/freescale/pinctrl-imx7d.c
index 1c89613..d30d91f 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx7d.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx7d.c
@@ -395,7 +395,6 @@
 		.of_match_table = of_match_ptr(imx7d_pinctrl_of_match),
 	},
 	.probe = imx7d_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init imx7d_pinctrl_init(void)
diff --git a/drivers/pinctrl/freescale/pinctrl-vf610.c b/drivers/pinctrl/freescale/pinctrl-vf610.c
index 587d1ff..6d81be0 100644
--- a/drivers/pinctrl/freescale/pinctrl-vf610.c
+++ b/drivers/pinctrl/freescale/pinctrl-vf610.c
@@ -318,7 +318,6 @@
 		.of_match_table = vf610_pinctrl_of_match,
 	},
 	.probe = vf610_pinctrl_probe,
-	.remove = imx_pinctrl_remove,
 };
 
 static int __init vf610_pinctrl_init(void)
diff --git a/drivers/pinctrl/intel/Kconfig b/drivers/pinctrl/intel/Kconfig
index 4d2efad..1c74e03 100644
--- a/drivers/pinctrl/intel/Kconfig
+++ b/drivers/pinctrl/intel/Kconfig
@@ -6,6 +6,9 @@
 	bool "Intel Baytrail GPIO pin control"
 	depends on GPIOLIB && ACPI
 	select GPIOLIB_IRQCHIP
+	select PINMUX
+	select PINCONF
+	select GENERIC_PINCONF
 	help
 	  driver for memory mapped GPIO functionality on Intel Baytrail
 	  platforms. Supports 3 banks with 102, 28 and 44 gpios.
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 21b79a4..55182fc 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -20,6 +20,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
+#include <linux/gpio.h>
 #include <linux/gpio/driver.h>
 #include <linux/acpi.h>
 #include <linux/platform_device.h>
@@ -27,6 +28,9 @@
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
 
 /* memory mapped register offsets */
 #define BYT_CONF0_REG		0x000
@@ -34,6 +38,7 @@
 #define BYT_VAL_REG		0x008
 #define BYT_DFT_REG		0x00c
 #define BYT_INT_STAT_REG	0x800
+#define BYT_DEBOUNCE_REG	0x9d0
 
 /* BYT_CONF0_REG register bits */
 #define BYT_IODEN		BIT(31)
@@ -41,6 +46,7 @@
 #define BYT_TRIG_NEG		BIT(26)
 #define BYT_TRIG_POS		BIT(25)
 #define BYT_TRIG_LVL		BIT(24)
+#define BYT_DEBOUNCE_EN		BIT(20)
 #define BYT_PULL_STR_SHIFT	9
 #define BYT_PULL_STR_MASK	(3 << BYT_PULL_STR_SHIFT)
 #define BYT_PULL_STR_2K		(0 << BYT_PULL_STR_SHIFT)
@@ -65,6 +71,16 @@
 				 BYT_PIN_MUX)
 #define BYT_VAL_RESTORE_MASK	(BYT_DIR_MASK | BYT_LEVEL)
 
+/* BYT_DEBOUNCE_REG bits */
+#define BYT_DEBOUNCE_PULSE_MASK		0x7
+#define BYT_DEBOUNCE_PULSE_375US	1
+#define BYT_DEBOUNCE_PULSE_750US	2
+#define BYT_DEBOUNCE_PULSE_1500US	3
+#define BYT_DEBOUNCE_PULSE_3MS		4
+#define BYT_DEBOUNCE_PULSE_6MS		5
+#define BYT_DEBOUNCE_PULSE_12MS		6
+#define BYT_DEBOUNCE_PULSE_24MS		7
+
 #define BYT_NGPIO_SCORE		102
 #define BYT_NGPIO_NCORE		28
 #define BYT_NGPIO_SUS		44
@@ -74,18 +90,227 @@
 #define BYT_SUS_ACPI_UID	"3"
 
 /*
- * Baytrail gpio controller consist of three separate sub-controllers called
- * SCORE, NCORE and SUS. The sub-controllers are identified by their acpi UID.
- *
- * GPIO numbering is _not_ ordered meaning that gpio # 0 in ACPI namespace does
- * _not_ correspond to the first gpio register at controller's gpio base.
- * There is no logic or pattern in mapping gpio numbers to registers (pads) so
- * each sub-controller needs to have its own mapping table
+ * This is the function value most pins have for GPIO muxing. If the value
+ * differs from the default one, it must be explicitly mentioned. Otherwise, the
+ * pin control implementation will set the muxing value to default GPIO if it
+ * does not find a match for the requested function.
  */
+#define BYT_DEFAULT_GPIO_MUX	0
 
-/* score_pins[gpio_nr] = pad_nr */
+struct byt_gpio_pin_context {
+	u32 conf0;
+	u32 val;
+};
 
-static unsigned const score_pins[BYT_NGPIO_SCORE] = {
+struct byt_simple_func_mux {
+	const char *name;
+	unsigned short func;
+};
+
+struct byt_mixed_func_mux {
+	const char *name;
+	const unsigned short *func_values;
+};
+
+struct byt_pingroup {
+	const char *name;
+	const unsigned int *pins;
+	size_t npins;
+	unsigned short has_simple_funcs;
+	union {
+		const struct byt_simple_func_mux *simple_funcs;
+		const struct byt_mixed_func_mux *mixed_funcs;
+	};
+	size_t nfuncs;
+};
+
+struct byt_function {
+	const char *name;
+	const char * const *groups;
+	size_t ngroups;
+};
+
+struct byt_community {
+	unsigned int pin_base;
+	size_t npins;
+	const unsigned int *pad_map;
+	void __iomem *reg_base;
+};
+
+#define SIMPLE_FUNC(n, f)	\
+	{			\
+		.name	= (n),	\
+		.func	= (f),	\
+	}
+#define MIXED_FUNC(n, f)		\
+	{				\
+		.name		= (n),	\
+		.func_values	= (f),	\
+	}
+
+#define PIN_GROUP_SIMPLE(n, p, f)				\
+	{							\
+		.name			= (n),			\
+		.pins			= (p),			\
+		.npins			= ARRAY_SIZE((p)),	\
+		.has_simple_funcs	= 1,		\
+		.simple_funcs		= (f),			\
+		.nfuncs			= ARRAY_SIZE((f)),	\
+	}
+#define PIN_GROUP_MIXED(n, p, f)				\
+	{							\
+		.name			= (n),			\
+		.pins			= (p),			\
+		.npins			= ARRAY_SIZE((p)),	\
+		.has_simple_funcs	= 0,			\
+		.mixed_funcs		= (f),			\
+		.nfuncs			= ARRAY_SIZE((f)),	\
+	}
+
+#define FUNCTION(n, g)					\
+	{						\
+		.name		= (n),			\
+		.groups		= (g),			\
+		.ngroups	= ARRAY_SIZE((g)),	\
+	}
+
+#define COMMUNITY(p, n, map)		\
+	{				\
+		.pin_base	= (p),	\
+		.npins		= (n),	\
+		.pad_map	= (map),\
+	}
+
+struct byt_pinctrl_soc_data {
+	const char *uid;
+	const struct pinctrl_pin_desc *pins;
+	size_t npins;
+	const struct byt_pingroup *groups;
+	size_t ngroups;
+	const struct byt_function *functions;
+	size_t nfunctions;
+	const struct byt_community *communities;
+	size_t ncommunities;
+};
+
+struct byt_gpio {
+	struct gpio_chip chip;
+	struct platform_device *pdev;
+	struct pinctrl_dev *pctl_dev;
+	struct pinctrl_desc pctl_desc;
+	raw_spinlock_t lock;
+	const struct byt_pinctrl_soc_data *soc_data;
+	struct byt_community *communities_copy;
+	struct byt_gpio_pin_context *saved_context;
+};
+
+/* SCORE pins, aka GPIOC_<pin_no> or GPIO_S0_SC[<pin_no>] */
+static const struct pinctrl_pin_desc byt_score_pins[] = {
+	PINCTRL_PIN(0, "SATA_GP0"),
+	PINCTRL_PIN(1, "SATA_GP1"),
+	PINCTRL_PIN(2, "SATA_LED#"),
+	PINCTRL_PIN(3, "PCIE_CLKREQ0"),
+	PINCTRL_PIN(4, "PCIE_CLKREQ1"),
+	PINCTRL_PIN(5, "PCIE_CLKREQ2"),
+	PINCTRL_PIN(6, "PCIE_CLKREQ3"),
+	PINCTRL_PIN(7, "SD3_WP"),
+	PINCTRL_PIN(8, "HDA_RST"),
+	PINCTRL_PIN(9, "HDA_SYNC"),
+	PINCTRL_PIN(10, "HDA_CLK"),
+	PINCTRL_PIN(11, "HDA_SDO"),
+	PINCTRL_PIN(12, "HDA_SDI0"),
+	PINCTRL_PIN(13, "HDA_SDI1"),
+	PINCTRL_PIN(14, "GPIO_S0_SC14"),
+	PINCTRL_PIN(15, "GPIO_S0_SC15"),
+	PINCTRL_PIN(16, "MMC1_CLK"),
+	PINCTRL_PIN(17, "MMC1_D0"),
+	PINCTRL_PIN(18, "MMC1_D1"),
+	PINCTRL_PIN(19, "MMC1_D2"),
+	PINCTRL_PIN(20, "MMC1_D3"),
+	PINCTRL_PIN(21, "MMC1_D4"),
+	PINCTRL_PIN(22, "MMC1_D5"),
+	PINCTRL_PIN(23, "MMC1_D6"),
+	PINCTRL_PIN(24, "MMC1_D7"),
+	PINCTRL_PIN(25, "MMC1_CMD"),
+	PINCTRL_PIN(26, "MMC1_RST"),
+	PINCTRL_PIN(27, "SD2_CLK"),
+	PINCTRL_PIN(28, "SD2_D0"),
+	PINCTRL_PIN(29, "SD2_D1"),
+	PINCTRL_PIN(30, "SD2_D2"),
+	PINCTRL_PIN(31, "SD2_D3_CD"),
+	PINCTRL_PIN(32, "SD2_CMD"),
+	PINCTRL_PIN(33, "SD3_CLK"),
+	PINCTRL_PIN(34, "SD3_D0"),
+	PINCTRL_PIN(35, "SD3_D1"),
+	PINCTRL_PIN(36, "SD3_D2"),
+	PINCTRL_PIN(37, "SD3_D3"),
+	PINCTRL_PIN(38, "SD3_CD"),
+	PINCTRL_PIN(39, "SD3_CMD"),
+	PINCTRL_PIN(40, "SD3_1P8EN"),
+	PINCTRL_PIN(41, "SD3_PWREN#"),
+	PINCTRL_PIN(42, "ILB_LPC_AD0"),
+	PINCTRL_PIN(43, "ILB_LPC_AD1"),
+	PINCTRL_PIN(44, "ILB_LPC_AD2"),
+	PINCTRL_PIN(45, "ILB_LPC_AD3"),
+	PINCTRL_PIN(46, "ILB_LPC_FRAME"),
+	PINCTRL_PIN(47, "ILB_LPC_CLK0"),
+	PINCTRL_PIN(48, "ILB_LPC_CLK1"),
+	PINCTRL_PIN(49, "ILB_LPC_CLKRUN"),
+	PINCTRL_PIN(50, "ILB_LPC_SERIRQ"),
+	PINCTRL_PIN(51, "PCU_SMB_DATA"),
+	PINCTRL_PIN(52, "PCU_SMB_CLK"),
+	PINCTRL_PIN(53, "PCU_SMB_ALERT"),
+	PINCTRL_PIN(54, "ILB_8254_SPKR"),
+	PINCTRL_PIN(55, "GPIO_S0_SC55"),
+	PINCTRL_PIN(56, "GPIO_S0_SC56"),
+	PINCTRL_PIN(57, "GPIO_S0_SC57"),
+	PINCTRL_PIN(58, "GPIO_S0_SC58"),
+	PINCTRL_PIN(59, "GPIO_S0_SC59"),
+	PINCTRL_PIN(60, "GPIO_S0_SC60"),
+	PINCTRL_PIN(61, "GPIO_S0_SC61"),
+	PINCTRL_PIN(62, "LPE_I2S2_CLK"),
+	PINCTRL_PIN(63, "LPE_I2S2_FRM"),
+	PINCTRL_PIN(64, "LPE_I2S2_DATAIN"),
+	PINCTRL_PIN(65, "LPE_I2S2_DATAOUT"),
+	PINCTRL_PIN(66, "SIO_SPI_CS"),
+	PINCTRL_PIN(67, "SIO_SPI_MISO"),
+	PINCTRL_PIN(68, "SIO_SPI_MOSI"),
+	PINCTRL_PIN(69, "SIO_SPI_CLK"),
+	PINCTRL_PIN(70, "SIO_UART1_RXD"),
+	PINCTRL_PIN(71, "SIO_UART1_TXD"),
+	PINCTRL_PIN(72, "SIO_UART1_RTS"),
+	PINCTRL_PIN(73, "SIO_UART1_CTS"),
+	PINCTRL_PIN(74, "SIO_UART2_RXD"),
+	PINCTRL_PIN(75, "SIO_UART2_TXD"),
+	PINCTRL_PIN(76, "SIO_UART2_RTS"),
+	PINCTRL_PIN(77, "SIO_UART2_CTS"),
+	PINCTRL_PIN(78, "SIO_I2C0_DATA"),
+	PINCTRL_PIN(79, "SIO_I2C0_CLK"),
+	PINCTRL_PIN(80, "SIO_I2C1_DATA"),
+	PINCTRL_PIN(81, "SIO_I2C1_CLK"),
+	PINCTRL_PIN(82, "SIO_I2C2_DATA"),
+	PINCTRL_PIN(83, "SIO_I2C2_CLK"),
+	PINCTRL_PIN(84, "SIO_I2C3_DATA"),
+	PINCTRL_PIN(85, "SIO_I2C3_CLK"),
+	PINCTRL_PIN(86, "SIO_I2C4_DATA"),
+	PINCTRL_PIN(87, "SIO_I2C4_CLK"),
+	PINCTRL_PIN(88, "SIO_I2C5_DATA"),
+	PINCTRL_PIN(89, "SIO_I2C5_CLK"),
+	PINCTRL_PIN(90, "SIO_I2C6_DATA"),
+	PINCTRL_PIN(91, "SIO_I2C6_CLK"),
+	PINCTRL_PIN(92, "GPIO_S0_SC92"),
+	PINCTRL_PIN(93, "GPIO_S0_SC93"),
+	PINCTRL_PIN(94, "SIO_PWM0"),
+	PINCTRL_PIN(95, "SIO_PWM1"),
+	PINCTRL_PIN(96, "PMC_PLT_CLK0"),
+	PINCTRL_PIN(97, "PMC_PLT_CLK1"),
+	PINCTRL_PIN(98, "PMC_PLT_CLK2"),
+	PINCTRL_PIN(99, "PMC_PLT_CLK3"),
+	PINCTRL_PIN(100, "PMC_PLT_CLK4"),
+	PINCTRL_PIN(101, "PMC_PLT_CLK5"),
+};
+
+static const unsigned int byt_score_pins_map[BYT_NGPIO_SCORE] = {
 	85, 89, 93, 96, 99, 102, 98, 101, 34, 37,
 	36, 38, 39, 35, 40, 84, 62, 61, 64, 59,
 	54, 56, 60, 55, 63, 57, 51, 50, 53, 47,
@@ -99,13 +324,263 @@
 	97, 100,
 };
 
-static unsigned const ncore_pins[BYT_NGPIO_NCORE] = {
-	19, 18, 17, 20, 21, 22, 24, 25, 23, 16,
-	14, 15, 12, 26, 27, 1, 4, 8, 11, 0,
-	3, 6, 10, 13, 2, 5, 9, 7,
+/* SCORE groups */
+static const unsigned int byt_score_uart1_pins[] = { 70, 71, 72, 73 };
+static const unsigned int byt_score_uart2_pins[] = { 74, 75, 76, 77 };
+static const struct byt_simple_func_mux byt_score_uart_mux[] = {
+	SIMPLE_FUNC("uart", 1),
 };
 
-static unsigned const sus_pins[BYT_NGPIO_SUS] = {
+static const unsigned int byt_score_pwm0_pins[] = { 94 };
+static const unsigned int byt_score_pwm1_pins[] = { 95 };
+static const struct byt_simple_func_mux byt_score_pwm_mux[] = {
+	SIMPLE_FUNC("pwm", 1),
+};
+
+static const unsigned int byt_score_sio_spi_pins[] = { 66, 67, 68, 69 };
+static const struct byt_simple_func_mux byt_score_spi_mux[] = {
+	SIMPLE_FUNC("spi", 1),
+};
+
+static const unsigned int byt_score_i2c5_pins[] = { 88, 89 };
+static const unsigned int byt_score_i2c6_pins[] = { 90, 91 };
+static const unsigned int byt_score_i2c4_pins[] = { 86, 87 };
+static const unsigned int byt_score_i2c3_pins[] = { 84, 85 };
+static const unsigned int byt_score_i2c2_pins[] = { 82, 83 };
+static const unsigned int byt_score_i2c1_pins[] = { 80, 81 };
+static const unsigned int byt_score_i2c0_pins[] = { 78, 79 };
+static const struct byt_simple_func_mux byt_score_i2c_mux[] = {
+	SIMPLE_FUNC("i2c", 1),
+};
+
+static const unsigned int byt_score_ssp0_pins[] = { 8, 9, 10, 11 };
+static const unsigned int byt_score_ssp1_pins[] = { 12, 13, 14, 15 };
+static const unsigned int byt_score_ssp2_pins[] = { 62, 63, 64, 65 };
+static const struct byt_simple_func_mux byt_score_ssp_mux[] = {
+	SIMPLE_FUNC("ssp", 1),
+};
+
+static const unsigned int byt_score_sdcard_pins[] = {
+	7, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+};
+static const unsigned short byt_score_sdcard_mux_values[] = {
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+static const struct byt_mixed_func_mux byt_score_sdcard_mux[] = {
+	MIXED_FUNC("sdcard", byt_score_sdcard_mux_values),
+};
+
+static const unsigned int byt_score_sdio_pins[] = { 27, 28, 29, 30, 31, 32 };
+static const struct byt_simple_func_mux byt_score_sdio_mux[] = {
+	SIMPLE_FUNC("sdio", 1),
+};
+
+static const unsigned int byt_score_emmc_pins[] = {
+	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+};
+static const struct byt_simple_func_mux byt_score_emmc_mux[] = {
+	SIMPLE_FUNC("emmc", 1),
+};
+
+static const unsigned int byt_score_ilb_lpc_pins[] = {
+	42, 43, 44, 45, 46, 47, 48, 49, 50,
+};
+static const struct byt_simple_func_mux byt_score_lpc_mux[] = {
+	SIMPLE_FUNC("lpc", 1),
+};
+
+static const unsigned int byt_score_sata_pins[] = { 0, 1, 2 };
+static const struct byt_simple_func_mux byt_score_sata_mux[] = {
+	SIMPLE_FUNC("sata", 1),
+};
+
+static const unsigned int byt_score_plt_clk0_pins[] = { 96 };
+static const unsigned int byt_score_plt_clk1_pins[] = { 97 };
+static const unsigned int byt_score_plt_clk2_pins[] = { 98 };
+static const unsigned int byt_score_plt_clk4_pins[] = { 99 };
+static const unsigned int byt_score_plt_clk5_pins[] = { 100 };
+static const unsigned int byt_score_plt_clk3_pins[] = { 101 };
+static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = {
+	SIMPLE_FUNC("plt_clk", 1),
+};
+
+static const unsigned int byt_score_smbus_pins[] = { 51, 52, 53 };
+static const struct byt_simple_func_mux byt_score_smbus_mux[] = {
+	SIMPLE_FUNC("smbus", 1),
+};
+
+static const struct byt_pingroup byt_score_groups[] = {
+	PIN_GROUP_SIMPLE("uart1_grp",
+			 byt_score_uart1_pins, byt_score_uart_mux),
+	PIN_GROUP_SIMPLE("uart2_grp",
+			 byt_score_uart2_pins, byt_score_uart_mux),
+	PIN_GROUP_SIMPLE("pwm0_grp",
+			 byt_score_pwm0_pins, byt_score_pwm_mux),
+	PIN_GROUP_SIMPLE("pwm1_grp",
+			 byt_score_pwm1_pins, byt_score_pwm_mux),
+	PIN_GROUP_SIMPLE("ssp2_grp",
+			 byt_score_ssp2_pins, byt_score_pwm_mux),
+	PIN_GROUP_SIMPLE("sio_spi_grp",
+			 byt_score_sio_spi_pins, byt_score_spi_mux),
+	PIN_GROUP_SIMPLE("i2c5_grp",
+			 byt_score_i2c5_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c6_grp",
+			 byt_score_i2c6_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c4_grp",
+			 byt_score_i2c4_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c3_grp",
+			 byt_score_i2c3_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c2_grp",
+			 byt_score_i2c2_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c1_grp",
+			 byt_score_i2c1_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("i2c0_grp",
+			 byt_score_i2c0_pins, byt_score_i2c_mux),
+	PIN_GROUP_SIMPLE("ssp0_grp",
+			 byt_score_ssp0_pins, byt_score_ssp_mux),
+	PIN_GROUP_SIMPLE("ssp1_grp",
+			 byt_score_ssp1_pins, byt_score_ssp_mux),
+	PIN_GROUP_MIXED("sdcard_grp",
+			byt_score_sdcard_pins, byt_score_sdcard_mux),
+	PIN_GROUP_SIMPLE("sdio_grp",
+			 byt_score_sdio_pins, byt_score_sdio_mux),
+	PIN_GROUP_SIMPLE("emmc_grp",
+			 byt_score_emmc_pins, byt_score_emmc_mux),
+	PIN_GROUP_SIMPLE("lpc_grp",
+			 byt_score_ilb_lpc_pins, byt_score_lpc_mux),
+	PIN_GROUP_SIMPLE("sata_grp",
+			 byt_score_sata_pins, byt_score_sata_mux),
+	PIN_GROUP_SIMPLE("plt_clk0_grp",
+			 byt_score_plt_clk0_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("plt_clk1_grp",
+			 byt_score_plt_clk1_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("plt_clk2_grp",
+			 byt_score_plt_clk2_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("plt_clk3_grp",
+			 byt_score_plt_clk3_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("plt_clk4_grp",
+			 byt_score_plt_clk4_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("plt_clk5_grp",
+			 byt_score_plt_clk5_pins, byt_score_plt_clk_mux),
+	PIN_GROUP_SIMPLE("smbus_grp",
+			 byt_score_smbus_pins, byt_score_smbus_mux),
+};
+
+static const char * const byt_score_uart_groups[] = {
+	"uart1_grp", "uart2_grp",
+};
+static const char * const byt_score_pwm_groups[] = {
+	"pwm0_grp", "pwm1_grp",
+};
+static const char * const byt_score_ssp_groups[] = {
+	"ssp0_grp", "ssp1_grp", "ssp2_grp",
+};
+static const char * const byt_score_spi_groups[] = { "sio_spi_grp" };
+static const char * const byt_score_i2c_groups[] = {
+	"i2c0_grp", "i2c1_grp", "i2c2_grp", "i2c3_grp", "i2c4_grp", "i2c5_grp",
+	"i2c6_grp",
+};
+static const char * const byt_score_sdcard_groups[] = { "sdcard_grp" };
+static const char * const byt_score_sdio_groups[] = { "sdio_grp" };
+static const char * const byt_score_emmc_groups[] = { "emmc_grp" };
+static const char * const byt_score_lpc_groups[] = { "lpc_grp" };
+static const char * const byt_score_sata_groups[] = { "sata_grp" };
+static const char * const byt_score_plt_clk_groups[] = {
+	"plt_clk0_grp", "plt_clk1_grp", "plt_clk2_grp", "plt_clk3_grp",
+	"plt_clk4_grp", "plt_clk5_grp",
+};
+static const char * const byt_score_smbus_groups[] = { "smbus_grp" };
+static const char * const byt_score_gpio_groups[] = {
+	"uart1_grp", "uart2_grp", "pwm0_grp", "pwm1_grp", "ssp0_grp",
+	"ssp1_grp", "ssp2_grp", "sio_spi_grp", "i2c0_grp", "i2c1_grp",
+	"i2c2_grp", "i2c3_grp", "i2c4_grp", "i2c5_grp", "i2c6_grp",
+	"sdcard_grp", "sdio_grp", "emmc_grp", "lpc_grp", "sata_grp",
+	"plt_clk0_grp", "plt_clk1_grp", "plt_clk2_grp", "plt_clk3_grp",
+	"plt_clk4_grp", "plt_clk5_grp", "smbus_grp",
+
+};
+
+static const struct byt_function byt_score_functions[] = {
+	FUNCTION("uart", byt_score_uart_groups),
+	FUNCTION("pwm", byt_score_pwm_groups),
+	FUNCTION("ssp", byt_score_ssp_groups),
+	FUNCTION("spi", byt_score_spi_groups),
+	FUNCTION("i2c", byt_score_i2c_groups),
+	FUNCTION("sdcard", byt_score_sdcard_groups),
+	FUNCTION("sdio", byt_score_sdio_groups),
+	FUNCTION("emmc", byt_score_emmc_groups),
+	FUNCTION("lpc", byt_score_lpc_groups),
+	FUNCTION("sata", byt_score_sata_groups),
+	FUNCTION("plt_clk", byt_score_plt_clk_groups),
+	FUNCTION("smbus", byt_score_smbus_groups),
+	FUNCTION("gpio", byt_score_gpio_groups),
+};
+
+static const struct byt_community byt_score_communities[] = {
+	COMMUNITY(0, BYT_NGPIO_SCORE, byt_score_pins_map),
+};
+
+static const struct byt_pinctrl_soc_data byt_score_soc_data = {
+	.uid		= BYT_SCORE_ACPI_UID,
+	.pins		= byt_score_pins,
+	.npins		= ARRAY_SIZE(byt_score_pins),
+	.groups		= byt_score_groups,
+	.ngroups	= ARRAY_SIZE(byt_score_groups),
+	.functions	= byt_score_functions,
+	.nfunctions	= ARRAY_SIZE(byt_score_functions),
+	.communities	= byt_score_communities,
+	.ncommunities	= ARRAY_SIZE(byt_score_communities),
+};
+
+/* SUS pins, aka GPIOS_<pin_no> or GPIO_S5[<pin_no>]  */
+static const struct pinctrl_pin_desc byt_sus_pins[] = {
+	PINCTRL_PIN(0, "GPIO_S50"),
+	PINCTRL_PIN(1, "GPIO_S51"),
+	PINCTRL_PIN(2, "GPIO_S52"),
+	PINCTRL_PIN(3, "GPIO_S53"),
+	PINCTRL_PIN(4, "GPIO_S54"),
+	PINCTRL_PIN(5, "GPIO_S55"),
+	PINCTRL_PIN(6, "GPIO_S56"),
+	PINCTRL_PIN(7, "GPIO_S57"),
+	PINCTRL_PIN(8, "GPIO_S58"),
+	PINCTRL_PIN(9, "GPIO_S59"),
+	PINCTRL_PIN(10, "GPIO_S510"),
+	PINCTRL_PIN(11, "PMC_SUSPWRDNACK"),
+	PINCTRL_PIN(12, "PMC_SUSCLK0"),
+	PINCTRL_PIN(13, "GPIO_S513"),
+	PINCTRL_PIN(14, "USB_ULPI_RST"),
+	PINCTRL_PIN(15, "PMC_WAKE_PCIE0#"),
+	PINCTRL_PIN(16, "PMC_PWRBTN"),
+	PINCTRL_PIN(17, "GPIO_S517"),
+	PINCTRL_PIN(18, "PMC_SUS_STAT"),
+	PINCTRL_PIN(19, "USB_OC0"),
+	PINCTRL_PIN(20, "USB_OC1"),
+	PINCTRL_PIN(21, "PCU_SPI_CS1"),
+	PINCTRL_PIN(22, "GPIO_S522"),
+	PINCTRL_PIN(23, "GPIO_S523"),
+	PINCTRL_PIN(24, "GPIO_S524"),
+	PINCTRL_PIN(25, "GPIO_S525"),
+	PINCTRL_PIN(26, "GPIO_S526"),
+	PINCTRL_PIN(27, "GPIO_S527"),
+	PINCTRL_PIN(28, "GPIO_S528"),
+	PINCTRL_PIN(29, "GPIO_S529"),
+	PINCTRL_PIN(30, "GPIO_S530"),
+	PINCTRL_PIN(31, "USB_ULPI_CLK"),
+	PINCTRL_PIN(32, "USB_ULPI_DATA0"),
+	PINCTRL_PIN(33, "USB_ULPI_DATA1"),
+	PINCTRL_PIN(34, "USB_ULPI_DATA2"),
+	PINCTRL_PIN(35, "USB_ULPI_DATA3"),
+	PINCTRL_PIN(36, "USB_ULPI_DATA4"),
+	PINCTRL_PIN(37, "USB_ULPI_DATA5"),
+	PINCTRL_PIN(38, "USB_ULPI_DATA6"),
+	PINCTRL_PIN(39, "USB_ULPI_DATA7"),
+	PINCTRL_PIN(40, "USB_ULPI_DIR"),
+	PINCTRL_PIN(41, "USB_ULPI_NXT"),
+	PINCTRL_PIN(42, "USB_ULPI_STP"),
+	PINCTRL_PIN(43, "USB_ULPI_REFCLK"),
+};
+
+static const unsigned int byt_sus_pins_map[BYT_NGPIO_SUS] = {
 	29, 33, 30, 31, 32, 34, 36, 35, 38, 37,
 	18, 7, 11, 20, 17, 1, 8, 10, 19, 12,
 	0, 2, 23, 39, 28, 27, 22, 21, 24, 25,
@@ -113,57 +588,357 @@
 	52, 53, 59, 40,
 };
 
-static struct pinctrl_gpio_range byt_ranges[] = {
-	{
-		.name = BYT_SCORE_ACPI_UID, /* match with acpi _UID in probe */
-		.npins = BYT_NGPIO_SCORE,
-		.pins = score_pins,
-	},
-	{
-		.name = BYT_NCORE_ACPI_UID,
-		.npins = BYT_NGPIO_NCORE,
-		.pins = ncore_pins,
-	},
-	{
-		.name = BYT_SUS_ACPI_UID,
-		.npins = BYT_NGPIO_SUS,
-		.pins = sus_pins,
-	},
-	{
-	},
+static const unsigned int byt_sus_usb_over_current_pins[] = { 19, 20 };
+static const struct byt_simple_func_mux byt_sus_usb_oc_mux[] = {
+	SIMPLE_FUNC("usb", 0),
+	SIMPLE_FUNC("gpio", 1),
 };
 
-struct byt_gpio_pin_context {
-	u32 conf0;
-	u32 val;
+static const unsigned int byt_sus_usb_ulpi_pins[] = {
+	14, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+};
+static const unsigned short byt_sus_usb_ulpi_mode_values[] = {
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+static const unsigned short byt_sus_usb_ulpi_gpio_mode_values[] = {
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+static const struct byt_mixed_func_mux byt_sus_usb_ulpi_mux[] = {
+	MIXED_FUNC("usb", byt_sus_usb_ulpi_mode_values),
+	MIXED_FUNC("gpio", byt_sus_usb_ulpi_gpio_mode_values),
 };
 
-struct byt_gpio {
-	struct gpio_chip		chip;
-	struct platform_device		*pdev;
-	raw_spinlock_t			lock;
-	void __iomem			*reg_base;
-	struct pinctrl_gpio_range	*range;
-	struct byt_gpio_pin_context	*saved_context;
+static const unsigned int byt_sus_pcu_spi_pins[] = { 21 };
+static const struct byt_simple_func_mux byt_sus_pcu_spi_mux[] = {
+	SIMPLE_FUNC("spi", 0),
+	SIMPLE_FUNC("gpio", 1),
 };
 
-static void __iomem *byt_gpio_reg(struct gpio_chip *chip, unsigned offset,
-				 int reg)
+static const struct byt_pingroup byt_sus_groups[] = {
+	PIN_GROUP_SIMPLE("usb_oc_grp",
+			byt_sus_usb_over_current_pins, byt_sus_usb_oc_mux),
+	PIN_GROUP_MIXED("usb_ulpi_grp",
+			byt_sus_usb_ulpi_pins, byt_sus_usb_ulpi_mux),
+	PIN_GROUP_SIMPLE("pcu_spi_grp",
+			byt_sus_pcu_spi_pins, byt_sus_pcu_spi_mux),
+};
+
+static const char * const byt_sus_usb_groups[] = {
+	"usb_oc_grp", "usb_ulpi_grp",
+};
+static const char * const byt_sus_spi_groups[] = { "pcu_spi_grp" };
+static const char * const byt_sus_gpio_groups[] = {
+	"usb_oc_grp", "usb_ulpi_grp", "pcu_spi_grp",
+};
+
+static const struct byt_function byt_sus_functions[] = {
+	FUNCTION("usb", byt_sus_usb_groups),
+	FUNCTION("spi", byt_sus_spi_groups),
+	FUNCTION("gpio", byt_sus_gpio_groups),
+};
+
+static const struct byt_community byt_sus_communities[] = {
+	COMMUNITY(0, BYT_NGPIO_SUS, byt_sus_pins_map),
+};
+
+static const struct byt_pinctrl_soc_data byt_sus_soc_data = {
+	.uid		= BYT_SUS_ACPI_UID,
+	.pins		= byt_sus_pins,
+	.npins		= ARRAY_SIZE(byt_sus_pins),
+	.groups		= byt_sus_groups,
+	.ngroups	= ARRAY_SIZE(byt_sus_groups),
+	.functions	= byt_sus_functions,
+	.nfunctions	= ARRAY_SIZE(byt_sus_functions),
+	.communities	= byt_sus_communities,
+	.ncommunities	= ARRAY_SIZE(byt_sus_communities),
+};
+
+static const struct pinctrl_pin_desc byt_ncore_pins[] = {
+	PINCTRL_PIN(0, "GPIO_NCORE0"),
+	PINCTRL_PIN(1, "GPIO_NCORE1"),
+	PINCTRL_PIN(2, "GPIO_NCORE2"),
+	PINCTRL_PIN(3, "GPIO_NCORE3"),
+	PINCTRL_PIN(4, "GPIO_NCORE4"),
+	PINCTRL_PIN(5, "GPIO_NCORE5"),
+	PINCTRL_PIN(6, "GPIO_NCORE6"),
+	PINCTRL_PIN(7, "GPIO_NCORE7"),
+	PINCTRL_PIN(8, "GPIO_NCORE8"),
+	PINCTRL_PIN(9, "GPIO_NCORE9"),
+	PINCTRL_PIN(10, "GPIO_NCORE10"),
+	PINCTRL_PIN(11, "GPIO_NCORE11"),
+	PINCTRL_PIN(12, "GPIO_NCORE12"),
+	PINCTRL_PIN(13, "GPIO_NCORE13"),
+	PINCTRL_PIN(14, "GPIO_NCORE14"),
+	PINCTRL_PIN(15, "GPIO_NCORE15"),
+	PINCTRL_PIN(16, "GPIO_NCORE16"),
+	PINCTRL_PIN(17, "GPIO_NCORE17"),
+	PINCTRL_PIN(18, "GPIO_NCORE18"),
+	PINCTRL_PIN(19, "GPIO_NCORE19"),
+	PINCTRL_PIN(20, "GPIO_NCORE20"),
+	PINCTRL_PIN(21, "GPIO_NCORE21"),
+	PINCTRL_PIN(22, "GPIO_NCORE22"),
+	PINCTRL_PIN(23, "GPIO_NCORE23"),
+	PINCTRL_PIN(24, "GPIO_NCORE24"),
+	PINCTRL_PIN(25, "GPIO_NCORE25"),
+	PINCTRL_PIN(26, "GPIO_NCORE26"),
+	PINCTRL_PIN(27, "GPIO_NCORE27"),
+};
+
+static unsigned const byt_ncore_pins_map[BYT_NGPIO_NCORE] = {
+	19, 18, 17, 20, 21, 22, 24, 25, 23, 16,
+	14, 15, 12, 26, 27, 1, 4, 8, 11, 0,
+	3, 6, 10, 13, 2, 5, 9, 7,
+};
+
+static const struct byt_community byt_ncore_communities[] = {
+	COMMUNITY(0, BYT_NGPIO_NCORE, byt_ncore_pins_map),
+};
+
+static const struct byt_pinctrl_soc_data byt_ncore_soc_data = {
+	.uid		= BYT_NCORE_ACPI_UID,
+	.pins		= byt_ncore_pins,
+	.npins		= ARRAY_SIZE(byt_ncore_pins),
+	.communities	= byt_ncore_communities,
+	.ncommunities	= ARRAY_SIZE(byt_ncore_communities),
+};
+
+static const struct byt_pinctrl_soc_data *byt_soc_data[] = {
+	&byt_score_soc_data,
+	&byt_sus_soc_data,
+	&byt_ncore_soc_data,
+	NULL,
+};
+
+static struct byt_community *byt_get_community(struct byt_gpio *vg,
+					       unsigned int pin)
 {
-	struct byt_gpio *vg = gpiochip_get_data(chip);
-	u32 reg_offset;
+	struct byt_community *comm;
+	int i;
 
+	for (i = 0; i < vg->soc_data->ncommunities; i++) {
+		comm = vg->communities_copy + i;
+		if (pin < comm->pin_base + comm->npins && pin >= comm->pin_base)
+			return comm;
+	}
+
+	return NULL;
+}
+
+static void __iomem *byt_gpio_reg(struct byt_gpio *vg, unsigned int offset,
+				  int reg)
+{
+	struct byt_community *comm = byt_get_community(vg, offset);
+	u32 reg_offset = 0;
+
+	if (!comm)
+		return NULL;
+
+	offset -= comm->pin_base;
 	if (reg == BYT_INT_STAT_REG)
 		reg_offset = (offset / 32) * 4;
 	else
-		reg_offset = vg->range->pins[offset] * 16;
+		reg_offset = comm->pad_map[offset] * 16;
 
-	return vg->reg_base + reg_offset + reg;
+	return comm->reg_base + reg_offset + reg;
 }
 
-static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned offset)
+static int byt_get_groups_count(struct pinctrl_dev *pctldev)
 {
-	void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	return vg->soc_data->ngroups;
+}
+
+static const char *byt_get_group_name(struct pinctrl_dev *pctldev,
+				      unsigned int selector)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	return vg->soc_data->groups[selector].name;
+}
+
+static int byt_get_group_pins(struct pinctrl_dev *pctldev,
+			      unsigned int selector,
+			      const unsigned int **pins,
+			      unsigned int *num_pins)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	*pins		= vg->soc_data->groups[selector].pins;
+	*num_pins	= vg->soc_data->groups[selector].npins;
+
+	return 0;
+}
+
+static const struct pinctrl_ops byt_pinctrl_ops = {
+	.get_groups_count	= byt_get_groups_count,
+	.get_group_name		= byt_get_group_name,
+	.get_group_pins		= byt_get_group_pins,
+};
+
+static int byt_get_functions_count(struct pinctrl_dev *pctldev)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	return vg->soc_data->nfunctions;
+}
+
+static const char *byt_get_function_name(struct pinctrl_dev *pctldev,
+					 unsigned int selector)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	return vg->soc_data->functions[selector].name;
+}
+
+static int byt_get_function_groups(struct pinctrl_dev *pctldev,
+				   unsigned int selector,
+				   const char * const **groups,
+				   unsigned int *num_groups)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+
+	*groups		= vg->soc_data->functions[selector].groups;
+	*num_groups	= vg->soc_data->functions[selector].ngroups;
+
+	return 0;
+}
+
+static int byt_get_group_simple_mux(const struct byt_pingroup group,
+				    const char *func_name,
+				    unsigned short *func)
+{
+	int i;
+
+	for (i = 0; i < group.nfuncs; i++) {
+		if (!strcmp(group.simple_funcs[i].name, func_name)) {
+			*func = group.simple_funcs[i].func;
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static int byt_get_group_mixed_mux(const struct byt_pingroup group,
+				   const char *func_name,
+				   const unsigned short **func)
+{
+	int i;
+
+	for (i = 0; i < group.nfuncs; i++) {
+		if (!strcmp(group.mixed_funcs[i].name, func_name)) {
+			*func = group.mixed_funcs[i].func_values;
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static void byt_set_group_simple_mux(struct byt_gpio *vg,
+				     const struct byt_pingroup group,
+				     unsigned short func)
+{
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&vg->lock, flags);
+
+	for (i = 0; i < group.npins; i++) {
+		void __iomem *padcfg0;
+		u32 value;
+
+		padcfg0 = byt_gpio_reg(vg, group.pins[i], BYT_CONF0_REG);
+		if (!padcfg0) {
+			dev_warn(&vg->pdev->dev,
+				 "Group %s, pin %i not muxed (no padcfg0)\n",
+				 group.name, i);
+			continue;
+		}
+
+		value = readl(padcfg0);
+		value &= ~BYT_PIN_MUX;
+		value |= func;
+		writel(value, padcfg0);
+	}
+
+	raw_spin_unlock_irqrestore(&vg->lock, flags);
+}
+
+static void byt_set_group_mixed_mux(struct byt_gpio *vg,
+				    const struct byt_pingroup group,
+				    const unsigned short *func)
+{
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&vg->lock, flags);
+
+	for (i = 0; i < group.npins; i++) {
+		void __iomem *padcfg0;
+		u32 value;
+
+		padcfg0 = byt_gpio_reg(vg, group.pins[i], BYT_CONF0_REG);
+		if (!padcfg0) {
+			dev_warn(&vg->pdev->dev,
+				 "Group %s, pin %i not muxed (no padcfg0)\n",
+				 group.name, i);
+			continue;
+		}
+
+		value = readl(padcfg0);
+		value &= ~BYT_PIN_MUX;
+		value |= func[i];
+		writel(value, padcfg0);
+	}
+
+	raw_spin_unlock_irqrestore(&vg->lock, flags);
+}
+
+static int byt_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector,
+		       unsigned int group_selector)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctldev);
+	const struct byt_function func = vg->soc_data->functions[func_selector];
+	const struct byt_pingroup group = vg->soc_data->groups[group_selector];
+	const unsigned short *mixed_func;
+	unsigned short simple_func;
+	int ret = 1;
+
+	if (group.has_simple_funcs)
+		ret = byt_get_group_simple_mux(group, func.name, &simple_func);
+	else
+		ret = byt_get_group_mixed_mux(group, func.name, &mixed_func);
+
+	if (ret)
+		byt_set_group_simple_mux(vg, group, BYT_DEFAULT_GPIO_MUX);
+	else if (group.has_simple_funcs)
+		byt_set_group_simple_mux(vg, group, simple_func);
+	else
+		byt_set_group_mixed_mux(vg, group, mixed_func);
+
+	return 0;
+}
+
+static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset)
+{
+	/* SCORE pin 92-93 */
+	if (!strcmp(vg->soc_data->uid, BYT_SCORE_ACPI_UID) &&
+	    offset >= 92 && offset <= 93)
+		return 1;
+
+	/* SUS pin 11-21 */
+	if (!strcmp(vg->soc_data->uid, BYT_SUS_ACPI_UID) &&
+	    offset >= 11 && offset <= 21)
+		return 1;
+
+	return 0;
+}
+
+static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset)
+{
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
 	unsigned long flags;
 	u32 value;
 
@@ -174,25 +949,12 @@
 	raw_spin_unlock_irqrestore(&vg->lock, flags);
 }
 
-static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset)
+static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev,
+				   struct pinctrl_gpio_range *range,
+				   unsigned int offset)
 {
-	/* SCORE pin 92-93 */
-	if (!strcmp(vg->range->name, BYT_SCORE_ACPI_UID) &&
-		offset >= 92 && offset <= 93)
-		return 1;
-
-	/* SUS pin 11-21 */
-	if (!strcmp(vg->range->name, BYT_SUS_ACPI_UID) &&
-		offset >= 11 && offset <= 21)
-		return 1;
-
-	return 0;
-}
-
-static int byt_gpio_request(struct gpio_chip *chip, unsigned offset)
-{
-	struct byt_gpio *vg = gpiochip_get_data(chip);
-	void __iomem *reg = byt_gpio_reg(chip, offset, BYT_CONF0_REG);
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctl_dev);
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
 	u32 value, gpio_mux;
 	unsigned long flags;
 
@@ -225,53 +987,318 @@
 	return 0;
 }
 
-static void byt_gpio_free(struct gpio_chip *chip, unsigned offset)
+static void byt_gpio_disable_free(struct pinctrl_dev *pctl_dev,
+				  struct pinctrl_gpio_range *range,
+				  unsigned int offset)
 {
-	struct byt_gpio *vg = gpiochip_get_data(chip);
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctl_dev);
 
 	byt_gpio_clear_triggering(vg, offset);
 	pm_runtime_put(&vg->pdev->dev);
 }
 
-static int byt_irq_type(struct irq_data *d, unsigned type)
+static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev,
+				  struct pinctrl_gpio_range *range,
+				  unsigned int offset,
+				  bool input)
 {
-	struct byt_gpio *vg = gpiochip_get_data(irq_data_get_irq_chip_data(d));
-	u32 offset = irqd_to_hwirq(d);
-	u32 value;
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctl_dev);
+	void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
+	void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
 	unsigned long flags;
-	void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
-
-	if (offset >= vg->chip.ngpio)
-		return -EINVAL;
+	u32 value;
 
 	raw_spin_lock_irqsave(&vg->lock, flags);
-	value = readl(reg);
 
-	WARN(value & BYT_DIRECT_IRQ_EN,
-		"Bad pad config for io mode, force direct_irq_en bit clearing");
-
-	/* For level trigges the BYT_TRIG_POS and BYT_TRIG_NEG bits
-	 * are used to indicate high and low level triggering
-	 */
-	value &= ~(BYT_DIRECT_IRQ_EN | BYT_TRIG_POS | BYT_TRIG_NEG |
-		   BYT_TRIG_LVL);
-
-	writel(value, reg);
-
-	if (type & IRQ_TYPE_EDGE_BOTH)
-		irq_set_handler_locked(d, handle_edge_irq);
-	else if (type & IRQ_TYPE_LEVEL_MASK)
-		irq_set_handler_locked(d, handle_level_irq);
+	value = readl(val_reg);
+	value &= ~BYT_DIR_MASK;
+	if (input)
+		value |= BYT_OUTPUT_EN;
+	else
+		/*
+		 * Before making any direction modifications, do a check if gpio
+		 * is set for direct IRQ.  On baytrail, setting GPIO to output
+		 * does not make sense, so let's at least warn the caller before
+		 * they shoot themselves in the foot.
+		 */
+		WARN(readl(conf_reg) & BYT_DIRECT_IRQ_EN,
+		     "Potential Error: Setting GPIO with direct_irq_en to output");
+	writel(value, val_reg);
 
 	raw_spin_unlock_irqrestore(&vg->lock, flags);
 
 	return 0;
 }
 
+static const struct pinmux_ops byt_pinmux_ops = {
+	.get_functions_count	= byt_get_functions_count,
+	.get_function_name	= byt_get_function_name,
+	.get_function_groups	= byt_get_function_groups,
+	.set_mux		= byt_set_mux,
+	.gpio_request_enable	= byt_gpio_request_enable,
+	.gpio_disable_free	= byt_gpio_disable_free,
+	.gpio_set_direction	= byt_gpio_set_direction,
+};
+
+static void byt_get_pull_strength(u32 reg, u16 *strength)
+{
+	switch (reg & BYT_PULL_STR_MASK) {
+	case BYT_PULL_STR_2K:
+		*strength = 2000;
+		break;
+	case BYT_PULL_STR_10K:
+		*strength = 10000;
+		break;
+	case BYT_PULL_STR_20K:
+		*strength = 20000;
+		break;
+	case BYT_PULL_STR_40K:
+		*strength = 40000;
+		break;
+	}
+}
+
+static int byt_set_pull_strength(u32 *reg, u16 strength)
+{
+	*reg &= ~BYT_PULL_STR_MASK;
+
+	switch (strength) {
+	case 2000:
+		*reg |= BYT_PULL_STR_2K;
+		break;
+	case 10000:
+		*reg |= BYT_PULL_STR_10K;
+		break;
+	case 20000:
+		*reg |= BYT_PULL_STR_20K;
+		break;
+	case 40000:
+		*reg |= BYT_PULL_STR_40K;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset,
+			      unsigned long *config)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctl_dev);
+	enum pin_config_param param = pinconf_to_config_param(*config);
+	void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
+	void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
+	unsigned long flags;
+	u32 conf, pull, val, debounce;
+	u16 arg = 0;
+
+	raw_spin_lock_irqsave(&vg->lock, flags);
+	conf = readl(conf_reg);
+	pull = conf & BYT_PULL_ASSIGN_MASK;
+	val = readl(val_reg);
+	raw_spin_unlock_irqrestore(&vg->lock, flags);
+
+	switch (param) {
+	case PIN_CONFIG_BIAS_DISABLE:
+		if (pull)
+			return -EINVAL;
+		break;
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		/* Pull assignment is only applicable in input mode */
+		if ((val & BYT_INPUT_EN) || pull != BYT_PULL_ASSIGN_DOWN)
+			return -EINVAL;
+
+		byt_get_pull_strength(conf, &arg);
+
+		break;
+	case PIN_CONFIG_BIAS_PULL_UP:
+		/* Pull assignment is only applicable in input mode */
+		if ((val & BYT_INPUT_EN) || pull != BYT_PULL_ASSIGN_UP)
+			return -EINVAL;
+
+		byt_get_pull_strength(conf, &arg);
+
+		break;
+	case PIN_CONFIG_INPUT_DEBOUNCE:
+		if (!(conf & BYT_DEBOUNCE_EN))
+			return -EINVAL;
+
+		raw_spin_lock_irqsave(&vg->lock, flags);
+		debounce = readl(byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG));
+		raw_spin_unlock_irqrestore(&vg->lock, flags);
+
+		switch (debounce & BYT_DEBOUNCE_PULSE_MASK) {
+		case BYT_DEBOUNCE_PULSE_375US:
+			arg = 375;
+			break;
+		case BYT_DEBOUNCE_PULSE_750US:
+			arg = 750;
+			break;
+		case BYT_DEBOUNCE_PULSE_1500US:
+			arg = 1500;
+			break;
+		case BYT_DEBOUNCE_PULSE_3MS:
+			arg = 3000;
+			break;
+		case BYT_DEBOUNCE_PULSE_6MS:
+			arg = 6000;
+			break;
+		case BYT_DEBOUNCE_PULSE_12MS:
+			arg = 12000;
+			break;
+		case BYT_DEBOUNCE_PULSE_24MS:
+			arg = 24000;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	*config = pinconf_to_config_packed(param, arg);
+
+	return 0;
+}
+
+static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
+			      unsigned int offset,
+			      unsigned long *configs,
+			      unsigned int num_configs)
+{
+	struct byt_gpio *vg = pinctrl_dev_get_drvdata(pctl_dev);
+	unsigned int param, arg;
+	void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
+	void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
+	unsigned long flags;
+	u32 conf, val, debounce;
+	int i, ret = 0;
+
+	raw_spin_lock_irqsave(&vg->lock, flags);
+
+	conf = readl(conf_reg);
+	val = readl(val_reg);
+
+	for (i = 0; i < num_configs; i++) {
+		param = pinconf_to_config_param(configs[i]);
+		arg = pinconf_to_config_argument(configs[i]);
+
+		switch (param) {
+		case PIN_CONFIG_BIAS_DISABLE:
+			conf &= ~BYT_PULL_ASSIGN_MASK;
+			break;
+		case PIN_CONFIG_BIAS_PULL_DOWN:
+			/* Set default strength value in case none is given */
+			if (arg == 1)
+				arg = 2000;
+
+			/*
+			 * Pull assignment is only applicable in input mode. If
+			 * chip is not in input mode, set it and warn about it.
+			 */
+			if (val & BYT_INPUT_EN) {
+				val &= ~BYT_INPUT_EN;
+				writel(val, val_reg);
+				dev_warn(&vg->pdev->dev,
+					 "pin %u forcibly set to input mode\n",
+					 offset);
+			}
+
+			conf &= ~BYT_PULL_ASSIGN_MASK;
+			conf |= BYT_PULL_ASSIGN_DOWN;
+			ret = byt_set_pull_strength(&conf, arg);
+
+			break;
+		case PIN_CONFIG_BIAS_PULL_UP:
+			/* Set default strength value in case none is given */
+			if (arg == 1)
+				arg = 2000;
+
+			/*
+			 * Pull assignment is only applicable in input mode. If
+			 * chip is not in input mode, set it and warn about it.
+			 */
+			if (val & BYT_INPUT_EN) {
+				val &= ~BYT_INPUT_EN;
+				writel(val, val_reg);
+				dev_warn(&vg->pdev->dev,
+					 "pin %u forcibly set to input mode\n",
+					 offset);
+			}
+
+			conf &= ~BYT_PULL_ASSIGN_MASK;
+			conf |= BYT_PULL_ASSIGN_UP;
+			ret = byt_set_pull_strength(&conf, arg);
+
+			break;
+		case PIN_CONFIG_INPUT_DEBOUNCE:
+			debounce = readl(byt_gpio_reg(vg, offset,
+						      BYT_DEBOUNCE_REG));
+			conf &= ~BYT_DEBOUNCE_PULSE_MASK;
+
+			switch (arg) {
+			case 375:
+				conf |= BYT_DEBOUNCE_PULSE_375US;
+				break;
+			case 750:
+				conf |= BYT_DEBOUNCE_PULSE_750US;
+				break;
+			case 1500:
+				conf |= BYT_DEBOUNCE_PULSE_1500US;
+				break;
+			case 3000:
+				conf |= BYT_DEBOUNCE_PULSE_3MS;
+				break;
+			case 6000:
+				conf |= BYT_DEBOUNCE_PULSE_6MS;
+				break;
+			case 12000:
+				conf |= BYT_DEBOUNCE_PULSE_12MS;
+				break;
+			case 24000:
+				conf |= BYT_DEBOUNCE_PULSE_24MS;
+				break;
+			default:
+				ret = -EINVAL;
+			}
+
+			break;
+		default:
+			ret = -ENOTSUPP;
+		}
+
+		if (ret)
+			break;
+	}
+
+	if (!ret)
+		writel(conf, conf_reg);
+
+	raw_spin_unlock_irqrestore(&vg->lock, flags);
+
+	return ret;
+}
+
+static const struct pinconf_ops byt_pinconf_ops = {
+	.is_generic	= true,
+	.pin_config_get	= byt_pin_config_get,
+	.pin_config_set	= byt_pin_config_set,
+};
+
+static const struct pinctrl_desc byt_pinctrl_desc = {
+	.pctlops	= &byt_pinctrl_ops,
+	.pmxops		= &byt_pinmux_ops,
+	.confops	= &byt_pinconf_ops,
+	.owner		= THIS_MODULE,
+};
+
 static int byt_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
-	void __iomem *reg = byt_gpio_reg(chip, offset, BYT_VAL_REG);
 	struct byt_gpio *vg = gpiochip_get_data(chip);
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
 	unsigned long flags;
 	u32 val;
 
@@ -285,69 +1312,58 @@
 static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
 	struct byt_gpio *vg = gpiochip_get_data(chip);
-	void __iomem *reg = byt_gpio_reg(chip, offset, BYT_VAL_REG);
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
 	unsigned long flags;
 	u32 old_val;
 
+	if (!reg)
+		return;
+
 	raw_spin_lock_irqsave(&vg->lock, flags);
-
 	old_val = readl(reg);
-
 	if (value)
 		writel(old_val | BYT_LEVEL, reg);
 	else
 		writel(old_val & ~BYT_LEVEL, reg);
-
 	raw_spin_unlock_irqrestore(&vg->lock, flags);
 }
 
-static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
 {
 	struct byt_gpio *vg = gpiochip_get_data(chip);
-	void __iomem *reg = byt_gpio_reg(chip, offset, BYT_VAL_REG);
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_VAL_REG);
 	unsigned long flags;
 	u32 value;
 
+	if (!reg)
+		return -EINVAL;
+
 	raw_spin_lock_irqsave(&vg->lock, flags);
-
-	value = readl(reg) | BYT_DIR_MASK;
-	value &= ~BYT_INPUT_EN;		/* active low */
-	writel(value, reg);
-
+	value = readl(reg);
 	raw_spin_unlock_irqrestore(&vg->lock, flags);
 
-	return 0;
+	if (!(value & BYT_OUTPUT_EN))
+		return GPIOF_DIR_OUT;
+	if (!(value & BYT_INPUT_EN))
+		return GPIOF_DIR_IN;
+
+	return -EINVAL;
+}
+
+static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
+{
+	return pinctrl_gpio_direction_input(chip->base + offset);
 }
 
 static int byt_gpio_direction_output(struct gpio_chip *chip,
-				     unsigned gpio, int value)
+				     unsigned int offset, int value)
 {
-	struct byt_gpio *vg = gpiochip_get_data(chip);
-	void __iomem *conf_reg = byt_gpio_reg(chip, gpio, BYT_CONF0_REG);
-	void __iomem *reg = byt_gpio_reg(chip, gpio, BYT_VAL_REG);
-	unsigned long flags;
-	u32 reg_val;
+	int ret = pinctrl_gpio_direction_output(chip->base + offset);
 
-	raw_spin_lock_irqsave(&vg->lock, flags);
+	if (ret)
+		return ret;
 
-	/*
-	 * Before making any direction modifications, do a check if gpio
-	 * is set for direct IRQ.  On baytrail, setting GPIO to output does
-	 * not make sense, so let's at least warn the caller before they shoot
-	 * themselves in the foot.
-	 */
-	WARN(readl(conf_reg) & BYT_DIRECT_IRQ_EN,
-		"Potential Error: Setting GPIO with direct_irq_en to output");
-
-	reg_val = readl(reg) | BYT_DIR_MASK;
-	reg_val &= ~(BYT_OUTPUT_EN | BYT_INPUT_EN);
-
-	if (value)
-		writel(reg_val | BYT_LEVEL, reg);
-	else
-		writel(reg_val & ~BYT_LEVEL, reg);
-
-	raw_spin_unlock_irqrestore(&vg->lock, flags);
+	byt_gpio_set(chip, offset, value);
 
 	return 0;
 }
@@ -356,20 +1372,45 @@
 {
 	struct byt_gpio *vg = gpiochip_get_data(chip);
 	int i;
-	u32 conf0, val, offs;
+	u32 conf0, val;
 
-	for (i = 0; i < vg->chip.ngpio; i++) {
+	for (i = 0; i < vg->soc_data->npins; i++) {
+		const struct byt_community *comm;
 		const char *pull_str = NULL;
 		const char *pull = NULL;
+		void __iomem *reg;
 		unsigned long flags;
 		const char *label;
-		offs = vg->range->pins[i] * 16;
+		unsigned int pin;
 
 		raw_spin_lock_irqsave(&vg->lock, flags);
-		conf0 = readl(vg->reg_base + offs + BYT_CONF0_REG);
-		val = readl(vg->reg_base + offs + BYT_VAL_REG);
+		pin = vg->soc_data->pins[i].number;
+		reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG);
+		if (!reg) {
+			seq_printf(s,
+				   "Could not retrieve pin %i conf0 reg\n",
+				   pin);
+			raw_spin_unlock_irqrestore(&vg->lock, flags);
+			continue;
+		}
+		conf0 = readl(reg);
+
+		reg = byt_gpio_reg(vg, pin, BYT_VAL_REG);
+		if (!reg) {
+			seq_printf(s,
+				   "Could not retrieve pin %i val reg\n", pin);
+			raw_spin_unlock_irqrestore(&vg->lock, flags);
+			continue;
+		}
+		val = readl(reg);
 		raw_spin_unlock_irqrestore(&vg->lock, flags);
 
+		comm = byt_get_community(vg, pin);
+		if (!comm) {
+			seq_printf(s,
+				   "Could not get community for pin %i\n", pin);
+			continue;
+		}
 		label = gpiochip_is_requested(chip, i);
 		if (!label)
 			label = "Unrequested";
@@ -400,12 +1441,12 @@
 
 		seq_printf(s,
 			   " gpio-%-3d (%-20.20s) %s %s %s pad-%-3d offset:0x%03x mux:%d %s%s%s",
-			   i,
+			   pin,
 			   label,
 			   val & BYT_INPUT_EN ? "  " : "in",
 			   val & BYT_OUTPUT_EN ? "   " : "out",
 			   val & BYT_LEVEL ? "hi" : "lo",
-			   vg->range->pins[i], offs,
+			   comm->pad_map[i], comm->pad_map[i] * 32,
 			   conf0 & 0x7,
 			   conf0 & BYT_TRIG_NEG ? " fall" : "     ",
 			   conf0 & BYT_TRIG_POS ? " rise" : "     ",
@@ -423,27 +1464,17 @@
 	}
 }
 
-static void byt_gpio_irq_handler(struct irq_desc *desc)
-{
-	struct irq_data *data = irq_desc_get_irq_data(desc);
-	struct byt_gpio *vg = gpiochip_get_data(irq_desc_get_handler_data(desc));
-	struct irq_chip *chip = irq_data_get_irq_chip(data);
-	u32 base, pin;
-	void __iomem *reg;
-	unsigned long pending;
-	unsigned virq;
-
-	/* check from GPIO controller which pin triggered the interrupt */
-	for (base = 0; base < vg->chip.ngpio; base += 32) {
-		reg = byt_gpio_reg(&vg->chip, base, BYT_INT_STAT_REG);
-		pending = readl(reg);
-		for_each_set_bit(pin, &pending, 32) {
-			virq = irq_find_mapping(vg->chip.irqdomain, base + pin);
-			generic_handle_irq(virq);
-		}
-	}
-	chip->irq_eoi(data);
-}
+static const struct gpio_chip byt_gpio_chip = {
+	.owner			= THIS_MODULE,
+	.request		= gpiochip_generic_request,
+	.free			= gpiochip_generic_free,
+	.get_direction		= byt_gpio_get_direction,
+	.direction_input	= byt_gpio_direction_input,
+	.direction_output	= byt_gpio_direction_output,
+	.get			= byt_gpio_get,
+	.set			= byt_gpio_set,
+	.dbg_show		= byt_gpio_dbg_show,
+};
 
 static void byt_irq_ack(struct irq_data *d)
 {
@@ -452,12 +1483,23 @@
 	unsigned offset = irqd_to_hwirq(d);
 	void __iomem *reg;
 
+	reg = byt_gpio_reg(vg, offset, BYT_INT_STAT_REG);
+	if (!reg)
+		return;
+
 	raw_spin_lock(&vg->lock);
-	reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG);
 	writel(BIT(offset % 32), reg);
 	raw_spin_unlock(&vg->lock);
 }
 
+static void byt_irq_mask(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct byt_gpio *vg = gpiochip_get_data(gc);
+
+	byt_gpio_clear_triggering(vg, irqd_to_hwirq(d));
+}
+
 static void byt_irq_unmask(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -467,7 +1509,9 @@
 	void __iomem *reg;
 	u32 value;
 
-	reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
+	reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
+	if (!reg)
+		return;
 
 	raw_spin_lock_irqsave(&vg->lock, flags);
 	value = readl(reg);
@@ -493,23 +1537,81 @@
 	raw_spin_unlock_irqrestore(&vg->lock, flags);
 }
 
-static void byt_irq_mask(struct irq_data *d)
+static int byt_irq_type(struct irq_data *d, unsigned int type)
 {
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct byt_gpio *vg = gpiochip_get_data(gc);
+	struct byt_gpio *vg = gpiochip_get_data(irq_data_get_irq_chip_data(d));
+	u32 offset = irqd_to_hwirq(d);
+	u32 value;
+	unsigned long flags;
+	void __iomem *reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG);
 
-	byt_gpio_clear_triggering(vg, irqd_to_hwirq(d));
+	if (!reg || offset >= vg->chip.ngpio)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&vg->lock, flags);
+	value = readl(reg);
+
+	WARN(value & BYT_DIRECT_IRQ_EN,
+	     "Bad pad config for io mode, force direct_irq_en bit clearing");
+
+	/* For level trigges the BYT_TRIG_POS and BYT_TRIG_NEG bits
+	 * are used to indicate high and low level triggering
+	 */
+	value &= ~(BYT_DIRECT_IRQ_EN | BYT_TRIG_POS | BYT_TRIG_NEG |
+		   BYT_TRIG_LVL);
+
+	writel(value, reg);
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		irq_set_handler_locked(d, handle_edge_irq);
+	else if (type & IRQ_TYPE_LEVEL_MASK)
+		irq_set_handler_locked(d, handle_level_irq);
+
+	raw_spin_unlock_irqrestore(&vg->lock, flags);
+
+	return 0;
 }
 
 static struct irq_chip byt_irqchip = {
-	.name = "BYT-GPIO",
-	.irq_ack = byt_irq_ack,
-	.irq_mask = byt_irq_mask,
-	.irq_unmask = byt_irq_unmask,
-	.irq_set_type = byt_irq_type,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
+	.name		= "BYT-GPIO",
+	.irq_ack	= byt_irq_ack,
+	.irq_mask	= byt_irq_mask,
+	.irq_unmask	= byt_irq_unmask,
+	.irq_set_type	= byt_irq_type,
+	.flags		= IRQCHIP_SKIP_SET_WAKE,
 };
 
+static void byt_gpio_irq_handler(struct irq_desc *desc)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	struct byt_gpio *vg = gpiochip_get_data(
+				irq_desc_get_handler_data(desc));
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+	u32 base, pin;
+	void __iomem *reg;
+	unsigned long pending;
+	unsigned int virq;
+
+	/* check from GPIO controller which pin triggered the interrupt */
+	for (base = 0; base < vg->chip.ngpio; base += 32) {
+		reg = byt_gpio_reg(vg, base, BYT_INT_STAT_REG);
+
+		if (!reg) {
+			dev_warn(&vg->pdev->dev,
+				 "Pin %i: could not retrieve interrupt status register\n",
+				 base);
+			continue;
+		}
+
+		pending = readl(reg);
+		for_each_set_bit(pin, &pending, 32) {
+			virq = irq_find_mapping(vg->chip.irqdomain, base + pin);
+			generic_handle_irq(virq);
+		}
+	}
+	chip->irq_eoi(data);
+}
+
 static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
 {
 	void __iomem *reg;
@@ -521,8 +1623,18 @@
 	 * do not use direct IRQ mode. This will prevent spurious
 	 * interrupts from misconfigured pins.
 	 */
-	for (i = 0; i < vg->chip.ngpio; i++) {
-		value = readl(byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG));
+	for (i = 0; i < vg->soc_data->npins; i++) {
+		unsigned int pin = vg->soc_data->pins[i].number;
+
+		reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG);
+		if (!reg) {
+			dev_warn(&vg->pdev->dev,
+				 "Pin %i: could not retrieve conf0 register\n",
+				 i);
+			continue;
+		}
+
+		value = readl(reg);
 		if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i) &&
 		    !(value & BYT_DIRECT_IRQ_EN)) {
 			byt_gpio_clear_triggering(vg, i);
@@ -531,8 +1643,16 @@
 	}
 
 	/* clear interrupt status trigger registers */
-	for (base = 0; base < vg->chip.ngpio; base += 32) {
-		reg = byt_gpio_reg(&vg->chip, base, BYT_INT_STAT_REG);
+	for (base = 0; base < vg->soc_data->npins; base += 32) {
+		reg = byt_gpio_reg(vg, base, BYT_INT_STAT_REG);
+
+		if (!reg) {
+			dev_warn(&vg->pdev->dev,
+				 "Pin %i: could not retrieve irq status reg\n",
+				 base);
+			continue;
+		}
+
 		writel(0xffffffff, reg);
 		/* make sure trigger bits are cleared, if not then a pin
 		   might be misconfigured in bios */
@@ -543,82 +1663,47 @@
 	}
 }
 
-static int byt_gpio_probe(struct platform_device *pdev)
+static int byt_gpio_probe(struct byt_gpio *vg)
 {
-	struct byt_gpio *vg;
 	struct gpio_chip *gc;
-	struct resource *mem_rc, *irq_rc;
-	struct device *dev = &pdev->dev;
-	struct acpi_device *acpi_dev;
-	struct pinctrl_gpio_range *range;
-	acpi_handle handle = ACPI_HANDLE(dev);
+	struct resource *irq_rc;
 	int ret;
 
-	if (acpi_bus_get_device(handle, &acpi_dev))
-		return -ENODEV;
-
-	vg = devm_kzalloc(dev, sizeof(struct byt_gpio), GFP_KERNEL);
-	if (!vg) {
-		dev_err(&pdev->dev, "can't allocate byt_gpio chip data\n");
-		return -ENOMEM;
-	}
-
-	for (range = byt_ranges; range->name; range++) {
-		if (!strcmp(acpi_dev->pnp.unique_id, range->name)) {
-			vg->chip.ngpio = range->npins;
-			vg->range = range;
-			break;
-		}
-	}
-
-	if (!vg->chip.ngpio || !vg->range)
-		return -ENODEV;
-
-	vg->pdev = pdev;
-	platform_set_drvdata(pdev, vg);
-
-	mem_rc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	vg->reg_base = devm_ioremap_resource(dev, mem_rc);
-	if (IS_ERR(vg->reg_base))
-		return PTR_ERR(vg->reg_base);
-
-	raw_spin_lock_init(&vg->lock);
-
-	gc = &vg->chip;
-	gc->label = dev_name(&pdev->dev);
-	gc->owner = THIS_MODULE;
-	gc->request = byt_gpio_request;
-	gc->free = byt_gpio_free;
-	gc->direction_input = byt_gpio_direction_input;
-	gc->direction_output = byt_gpio_direction_output;
-	gc->get = byt_gpio_get;
-	gc->set = byt_gpio_set;
-	gc->dbg_show = byt_gpio_dbg_show;
-	gc->base = -1;
-	gc->can_sleep = false;
-	gc->parent = dev;
+	/* Set up gpio chip */
+	vg->chip	= byt_gpio_chip;
+	gc		= &vg->chip;
+	gc->label	= dev_name(&vg->pdev->dev);
+	gc->base	= -1;
+	gc->can_sleep	= false;
+	gc->parent	= &vg->pdev->dev;
+	gc->ngpio	= vg->soc_data->npins;
 
 #ifdef CONFIG_PM_SLEEP
-	vg->saved_context = devm_kcalloc(&pdev->dev, gc->ngpio,
+	vg->saved_context = devm_kcalloc(&vg->pdev->dev, gc->ngpio,
 				       sizeof(*vg->saved_context), GFP_KERNEL);
 #endif
-
 	ret = gpiochip_add_data(gc, vg);
 	if (ret) {
-		dev_err(&pdev->dev, "failed adding byt-gpio chip\n");
+		dev_err(&vg->pdev->dev, "failed adding byt-gpio chip\n");
 		return ret;
 	}
 
+	ret = gpiochip_add_pin_range(&vg->chip, dev_name(&vg->pdev->dev),
+				     0, 0, vg->soc_data->npins);
+	if (ret) {
+		dev_err(&vg->pdev->dev, "failed to add GPIO pin range\n");
+		goto fail;
+	}
+
 	/* set up interrupts  */
-	irq_rc = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	irq_rc = platform_get_resource(vg->pdev, IORESOURCE_IRQ, 0);
 	if (irq_rc && irq_rc->start) {
 		byt_gpio_irq_init_hw(vg);
 		ret = gpiochip_irqchip_add(gc, &byt_irqchip, 0,
 					   handle_simple_irq, IRQ_TYPE_NONE);
 		if (ret) {
-			dev_err(dev, "failed to add irqchip\n");
-			gpiochip_remove(gc);
-			return ret;
+			dev_err(&vg->pdev->dev, "failed to add irqchip\n");
+			goto fail;
 		}
 
 		gpiochip_set_chained_irqchip(gc, &byt_irqchip,
@@ -626,7 +1711,120 @@
 					     byt_gpio_irq_handler);
 	}
 
-	pm_runtime_enable(dev);
+	return ret;
+
+fail:
+	gpiochip_remove(&vg->chip);
+
+	return ret;
+}
+
+static int byt_set_soc_data(struct byt_gpio *vg,
+			    const struct byt_pinctrl_soc_data *soc_data)
+{
+	int i;
+
+	vg->soc_data = soc_data;
+	vg->communities_copy = devm_kcalloc(&vg->pdev->dev,
+					    soc_data->ncommunities,
+					    sizeof(*vg->communities_copy),
+					    GFP_KERNEL);
+	if (!vg->communities_copy)
+		return -ENOMEM;
+
+	for (i = 0; i < soc_data->ncommunities; i++) {
+		struct byt_community *comm = vg->communities_copy + i;
+		struct resource *mem_rc;
+
+		*comm = vg->soc_data->communities[i];
+
+		mem_rc = platform_get_resource(vg->pdev, IORESOURCE_MEM, 0);
+		comm->reg_base = devm_ioremap_resource(&vg->pdev->dev, mem_rc);
+		if (IS_ERR(comm->reg_base))
+			return PTR_ERR(comm->reg_base);
+	}
+
+	return 0;
+}
+
+static const struct acpi_device_id byt_gpio_acpi_match[] = {
+	{ "INT33B2", (kernel_ulong_t)byt_soc_data },
+	{ "INT33FC", (kernel_ulong_t)byt_soc_data },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, byt_gpio_acpi_match);
+
+static int byt_pinctrl_probe(struct platform_device *pdev)
+{
+	const struct byt_pinctrl_soc_data *soc_data = NULL;
+	const struct byt_pinctrl_soc_data **soc_table;
+	const struct acpi_device_id *acpi_id;
+	struct acpi_device *acpi_dev;
+	struct byt_gpio *vg;
+	int i, ret;
+
+	acpi_dev = ACPI_COMPANION(&pdev->dev);
+	if (!acpi_dev)
+		return -ENODEV;
+
+	acpi_id = acpi_match_device(byt_gpio_acpi_match, &pdev->dev);
+	if (!acpi_id)
+		return -ENODEV;
+
+	soc_table = (const struct byt_pinctrl_soc_data **)acpi_id->driver_data;
+
+	for (i = 0; soc_table[i]; i++) {
+		if (!strcmp(acpi_dev->pnp.unique_id, soc_table[i]->uid)) {
+			soc_data = soc_table[i];
+			break;
+		}
+	}
+
+	if (!soc_data)
+		return -ENODEV;
+
+	vg = devm_kzalloc(&pdev->dev, sizeof(*vg), GFP_KERNEL);
+	if (!vg)
+		return -ENOMEM;
+
+	vg->pdev = pdev;
+	ret = byt_set_soc_data(vg, soc_data);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to set soc data\n");
+		return ret;
+	}
+
+	vg->pctl_desc		= byt_pinctrl_desc;
+	vg->pctl_desc.name	= dev_name(&pdev->dev);
+	vg->pctl_desc.pins	= vg->soc_data->pins;
+	vg->pctl_desc.npins	= vg->soc_data->npins;
+
+	vg->pctl_dev = pinctrl_register(&vg->pctl_desc, &pdev->dev, vg);
+	if (IS_ERR(vg->pctl_dev)) {
+		dev_err(&pdev->dev, "failed to register pinctrl driver\n");
+		return PTR_ERR(vg->pctl_dev);
+	}
+
+	ret = byt_gpio_probe(vg);
+	if (ret) {
+		pinctrl_unregister(vg->pctl_dev);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, vg);
+	raw_spin_lock_init(&vg->lock);
+	pm_runtime_enable(&pdev->dev);
+
+	return 0;
+}
+
+static int byt_pinctrl_remove(struct platform_device *pdev)
+{
+	struct byt_gpio *vg = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	gpiochip_remove(&vg->chip);
+	pinctrl_unregister(vg->pctl_dev);
 
 	return 0;
 }
@@ -638,15 +1836,22 @@
 	struct byt_gpio *vg = platform_get_drvdata(pdev);
 	int i;
 
-	for (i = 0; i < vg->chip.ngpio; i++) {
+	for (i = 0; i < vg->soc_data->npins; i++) {
 		void __iomem *reg;
 		u32 value;
+		unsigned int pin = vg->soc_data->pins[i].number;
 
-		reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG);
+		reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG);
+		if (!reg) {
+			dev_warn(&vg->pdev->dev,
+				 "Pin %i: could not retrieve conf0 register\n",
+				 i);
+			continue;
+		}
 		value = readl(reg) & BYT_CONF0_RESTORE_MASK;
 		vg->saved_context[i].conf0 = value;
 
-		reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG);
+		reg = byt_gpio_reg(vg, pin, BYT_VAL_REG);
 		value = readl(reg) & BYT_VAL_RESTORE_MASK;
 		vg->saved_context[i].val = value;
 	}
@@ -660,11 +1865,18 @@
 	struct byt_gpio *vg = platform_get_drvdata(pdev);
 	int i;
 
-	for (i = 0; i < vg->chip.ngpio; i++) {
+	for (i = 0; i < vg->soc_data->npins; i++) {
 		void __iomem *reg;
 		u32 value;
+		unsigned int pin = vg->soc_data->pins[i].number;
 
-		reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG);
+		reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG);
+		if (!reg) {
+			dev_warn(&vg->pdev->dev,
+				 "Pin %i: could not retrieve conf0 register\n",
+				 i);
+			continue;
+		}
 		value = readl(reg);
 		if ((value & BYT_CONF0_RESTORE_MASK) !=
 		     vg->saved_context[i].conf0) {
@@ -674,7 +1886,7 @@
 			dev_info(dev, "restored pin %d conf0 %#08x", i, value);
 		}
 
-		reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG);
+		reg = byt_gpio_reg(vg, pin, BYT_VAL_REG);
 		value = readl(reg);
 		if ((value & BYT_VAL_RESTORE_MASK) !=
 		     vg->saved_context[i].val) {
@@ -712,26 +1924,9 @@
 			   NULL)
 };
 
-static const struct acpi_device_id byt_gpio_acpi_match[] = {
-	{ "INT33B2", 0 },
-	{ "INT33FC", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(acpi, byt_gpio_acpi_match);
-
-static int byt_gpio_remove(struct platform_device *pdev)
-{
-	struct byt_gpio *vg = platform_get_drvdata(pdev);
-
-	pm_runtime_disable(&pdev->dev);
-	gpiochip_remove(&vg->chip);
-
-	return 0;
-}
-
 static struct platform_driver byt_gpio_driver = {
-	.probe          = byt_gpio_probe,
-	.remove         = byt_gpio_remove,
+	.probe          = byt_pinctrl_probe,
+	.remove         = byt_pinctrl_remove,
 	.driver         = {
 		.name   = "byt_gpio",
 		.pm	= &byt_gpio_pm_ops,
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 4251e07..ac4f564 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1526,17 +1526,16 @@
 	pctrl->pctldesc.pins = pctrl->community->pins;
 	pctrl->pctldesc.npins = pctrl->community->npins;
 
-	pctrl->pctldev = pinctrl_register(&pctrl->pctldesc, &pdev->dev, pctrl);
+	pctrl->pctldev = devm_pinctrl_register(&pdev->dev, &pctrl->pctldesc,
+					       pctrl);
 	if (IS_ERR(pctrl->pctldev)) {
 		dev_err(&pdev->dev, "failed to register pinctrl driver\n");
 		return PTR_ERR(pctrl->pctldev);
 	}
 
 	ret = chv_gpio_probe(pctrl, irq);
-	if (ret) {
-		pinctrl_unregister(pctrl->pctldev);
+	if (ret)
 		return ret;
-	}
 
 	platform_set_drvdata(pdev, pctrl);
 
@@ -1548,7 +1547,6 @@
 	struct chv_pinctrl *pctrl = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&pctrl->chip);
-	pinctrl_unregister(pctrl->pctldev);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 6c2c816f..3584e50 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -1045,17 +1045,16 @@
 	pctrl->pctldesc.pins = pctrl->soc->pins;
 	pctrl->pctldesc.npins = pctrl->soc->npins;
 
-	pctrl->pctldev = pinctrl_register(&pctrl->pctldesc, &pdev->dev, pctrl);
+	pctrl->pctldev = devm_pinctrl_register(&pdev->dev, &pctrl->pctldesc,
+					       pctrl);
 	if (IS_ERR(pctrl->pctldev)) {
 		dev_err(&pdev->dev, "failed to register pinctrl driver\n");
 		return PTR_ERR(pctrl->pctldev);
 	}
 
 	ret = intel_gpio_probe(pctrl, irq);
-	if (ret) {
-		pinctrl_unregister(pctrl->pctldev);
+	if (ret)
 		return ret;
-	}
 
 	platform_set_drvdata(pdev, pctrl);
 
@@ -1068,7 +1067,6 @@
 	struct intel_pinctrl *pctrl = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&pctrl->chip);
-	pinctrl_unregister(pctrl->pctldev);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 6ab8c3c..207b13b 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -605,7 +605,7 @@
 		ret = mtk_pctrl_dt_subnode_to_map(pctldev, np, map,
 				&reserved_maps, num_maps);
 		if (ret < 0) {
-			pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+			pinctrl_utils_free_map(pctldev, *map, *num_maps);
 			of_node_put(np);
 			return ret;
 		}
@@ -644,7 +644,7 @@
 
 static const struct pinctrl_ops mtk_pctrl_ops = {
 	.dt_node_to_map		= mtk_pctrl_dt_node_to_map,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 	.get_groups_count	= mtk_pctrl_get_groups_count,
 	.get_group_name		= mtk_pctrl_get_group_name,
 	.get_group_pins		= mtk_pctrl_get_group_pins,
@@ -1396,17 +1396,16 @@
 	pctl->pctl_desc.pmxops = &mtk_pmx_ops;
 	pctl->dev = &pdev->dev;
 
-	pctl->pctl_dev = pinctrl_register(&pctl->pctl_desc, &pdev->dev, pctl);
+	pctl->pctl_dev = devm_pinctrl_register(&pdev->dev, &pctl->pctl_desc,
+					       pctl);
 	if (IS_ERR(pctl->pctl_dev)) {
 		dev_err(&pdev->dev, "couldn't register pinctrl driver\n");
 		return PTR_ERR(pctl->pctl_dev);
 	}
 
 	pctl->chip = devm_kzalloc(&pdev->dev, sizeof(*pctl->chip), GFP_KERNEL);
-	if (!pctl->chip) {
-		ret = -ENOMEM;
-		goto pctrl_error;
-	}
+	if (!pctl->chip)
+		return -ENOMEM;
 
 	*pctl->chip = mtk_gpio_chip;
 	pctl->chip->ngpio = pctl->devdata->npins;
@@ -1415,10 +1414,8 @@
 	pctl->chip->base = -1;
 
 	ret = gpiochip_add_data(pctl->chip, pctl);
-	if (ret) {
-		ret = -EINVAL;
-		goto pctrl_error;
-	}
+	if (ret)
+		return -EINVAL;
 
 	/* Register the GPIO to pin mappings. */
 	ret = gpiochip_add_pin_range(pctl->chip, dev_name(&pdev->dev),
@@ -1496,8 +1493,6 @@
 
 chip_error:
 	gpiochip_remove(pctl->chip);
-pctrl_error:
-	pinctrl_unregister(pctl->pctl_dev);
 	return ret;
 }
 
diff --git a/drivers/pinctrl/meson/Makefile b/drivers/pinctrl/meson/Makefile
index c751d22..24434f1 100644
--- a/drivers/pinctrl/meson/Makefile
+++ b/drivers/pinctrl/meson/Makefile
@@ -1,2 +1,2 @@
-obj-y	+= pinctrl-meson8.o pinctrl-meson8b.o
+obj-y	+= pinctrl-meson8.o pinctrl-meson8b.o pinctrl-meson-gxbb.o
 obj-y	+= pinctrl-meson.o
diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
new file mode 100644
index 0000000..eeabafb
--- /dev/null
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -0,0 +1,432 @@
+/*
+ * Pin controller and GPIO driver for Amlogic Meson GXBB.
+ *
+ * Copyright (C) 2016 Endless Mobile, Inc.
+ * Author: Carlo Caione <carlo@endlessm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <dt-bindings/gpio/meson-gxbb-gpio.h>
+#include "pinctrl-meson.h"
+
+#define EE_OFF	14
+
+static const struct pinctrl_pin_desc meson_gxbb_periphs_pins[] = {
+	MESON_PIN(GPIOZ_0, EE_OFF),
+	MESON_PIN(GPIOZ_1, EE_OFF),
+	MESON_PIN(GPIOZ_2, EE_OFF),
+	MESON_PIN(GPIOZ_3, EE_OFF),
+	MESON_PIN(GPIOZ_4, EE_OFF),
+	MESON_PIN(GPIOZ_5, EE_OFF),
+	MESON_PIN(GPIOZ_6, EE_OFF),
+	MESON_PIN(GPIOZ_7, EE_OFF),
+	MESON_PIN(GPIOZ_8, EE_OFF),
+	MESON_PIN(GPIOZ_9, EE_OFF),
+	MESON_PIN(GPIOZ_10, EE_OFF),
+	MESON_PIN(GPIOZ_11, EE_OFF),
+	MESON_PIN(GPIOZ_12, EE_OFF),
+	MESON_PIN(GPIOZ_13, EE_OFF),
+	MESON_PIN(GPIOZ_14, EE_OFF),
+	MESON_PIN(GPIOZ_15, EE_OFF),
+
+	MESON_PIN(GPIOH_0, EE_OFF),
+	MESON_PIN(GPIOH_1, EE_OFF),
+	MESON_PIN(GPIOH_2, EE_OFF),
+	MESON_PIN(GPIOH_3, EE_OFF),
+
+	MESON_PIN(BOOT_0, EE_OFF),
+	MESON_PIN(BOOT_1, EE_OFF),
+	MESON_PIN(BOOT_2, EE_OFF),
+	MESON_PIN(BOOT_3, EE_OFF),
+	MESON_PIN(BOOT_4, EE_OFF),
+	MESON_PIN(BOOT_5, EE_OFF),
+	MESON_PIN(BOOT_6, EE_OFF),
+	MESON_PIN(BOOT_7, EE_OFF),
+	MESON_PIN(BOOT_8, EE_OFF),
+	MESON_PIN(BOOT_9, EE_OFF),
+	MESON_PIN(BOOT_10, EE_OFF),
+	MESON_PIN(BOOT_11, EE_OFF),
+	MESON_PIN(BOOT_12, EE_OFF),
+	MESON_PIN(BOOT_13, EE_OFF),
+	MESON_PIN(BOOT_14, EE_OFF),
+	MESON_PIN(BOOT_15, EE_OFF),
+	MESON_PIN(BOOT_16, EE_OFF),
+	MESON_PIN(BOOT_17, EE_OFF),
+
+	MESON_PIN(CARD_0, EE_OFF),
+	MESON_PIN(CARD_1, EE_OFF),
+	MESON_PIN(CARD_2, EE_OFF),
+	MESON_PIN(CARD_3, EE_OFF),
+	MESON_PIN(CARD_4, EE_OFF),
+	MESON_PIN(CARD_5, EE_OFF),
+	MESON_PIN(CARD_6, EE_OFF),
+
+	MESON_PIN(GPIODV_0, EE_OFF),
+	MESON_PIN(GPIODV_1, EE_OFF),
+	MESON_PIN(GPIODV_2, EE_OFF),
+	MESON_PIN(GPIODV_3, EE_OFF),
+	MESON_PIN(GPIODV_4, EE_OFF),
+	MESON_PIN(GPIODV_5, EE_OFF),
+	MESON_PIN(GPIODV_6, EE_OFF),
+	MESON_PIN(GPIODV_7, EE_OFF),
+	MESON_PIN(GPIODV_8, EE_OFF),
+	MESON_PIN(GPIODV_9, EE_OFF),
+	MESON_PIN(GPIODV_10, EE_OFF),
+	MESON_PIN(GPIODV_11, EE_OFF),
+	MESON_PIN(GPIODV_12, EE_OFF),
+	MESON_PIN(GPIODV_13, EE_OFF),
+	MESON_PIN(GPIODV_14, EE_OFF),
+	MESON_PIN(GPIODV_15, EE_OFF),
+	MESON_PIN(GPIODV_16, EE_OFF),
+	MESON_PIN(GPIODV_17, EE_OFF),
+	MESON_PIN(GPIODV_19, EE_OFF),
+	MESON_PIN(GPIODV_20, EE_OFF),
+	MESON_PIN(GPIODV_21, EE_OFF),
+	MESON_PIN(GPIODV_22, EE_OFF),
+	MESON_PIN(GPIODV_23, EE_OFF),
+	MESON_PIN(GPIODV_24, EE_OFF),
+	MESON_PIN(GPIODV_25, EE_OFF),
+	MESON_PIN(GPIODV_26, EE_OFF),
+	MESON_PIN(GPIODV_27, EE_OFF),
+	MESON_PIN(GPIODV_28, EE_OFF),
+	MESON_PIN(GPIODV_29, EE_OFF),
+
+	MESON_PIN(GPIOY_0, EE_OFF),
+	MESON_PIN(GPIOY_1, EE_OFF),
+	MESON_PIN(GPIOY_2, EE_OFF),
+	MESON_PIN(GPIOY_3, EE_OFF),
+	MESON_PIN(GPIOY_4, EE_OFF),
+	MESON_PIN(GPIOY_5, EE_OFF),
+	MESON_PIN(GPIOY_6, EE_OFF),
+	MESON_PIN(GPIOY_7, EE_OFF),
+	MESON_PIN(GPIOY_8, EE_OFF),
+	MESON_PIN(GPIOY_9, EE_OFF),
+	MESON_PIN(GPIOY_10, EE_OFF),
+	MESON_PIN(GPIOY_11, EE_OFF),
+	MESON_PIN(GPIOY_12, EE_OFF),
+	MESON_PIN(GPIOY_13, EE_OFF),
+	MESON_PIN(GPIOY_14, EE_OFF),
+	MESON_PIN(GPIOY_15, EE_OFF),
+	MESON_PIN(GPIOY_16, EE_OFF),
+
+	MESON_PIN(GPIOX_0, EE_OFF),
+	MESON_PIN(GPIOX_1, EE_OFF),
+	MESON_PIN(GPIOX_2, EE_OFF),
+	MESON_PIN(GPIOX_3, EE_OFF),
+	MESON_PIN(GPIOX_4, EE_OFF),
+	MESON_PIN(GPIOX_5, EE_OFF),
+	MESON_PIN(GPIOX_6, EE_OFF),
+	MESON_PIN(GPIOX_7, EE_OFF),
+	MESON_PIN(GPIOX_8, EE_OFF),
+	MESON_PIN(GPIOX_9, EE_OFF),
+	MESON_PIN(GPIOX_10, EE_OFF),
+	MESON_PIN(GPIOX_11, EE_OFF),
+	MESON_PIN(GPIOX_12, EE_OFF),
+	MESON_PIN(GPIOX_13, EE_OFF),
+	MESON_PIN(GPIOX_14, EE_OFF),
+	MESON_PIN(GPIOX_15, EE_OFF),
+	MESON_PIN(GPIOX_16, EE_OFF),
+	MESON_PIN(GPIOX_17, EE_OFF),
+	MESON_PIN(GPIOX_18, EE_OFF),
+	MESON_PIN(GPIOX_19, EE_OFF),
+	MESON_PIN(GPIOX_20, EE_OFF),
+	MESON_PIN(GPIOX_21, EE_OFF),
+	MESON_PIN(GPIOX_22, EE_OFF),
+
+	MESON_PIN(GPIOCLK_0, EE_OFF),
+	MESON_PIN(GPIOCLK_1, EE_OFF),
+	MESON_PIN(GPIOCLK_2, EE_OFF),
+	MESON_PIN(GPIOCLK_3, EE_OFF),
+
+	MESON_PIN(GPIO_TEST_N, EE_OFF),
+};
+
+static const struct pinctrl_pin_desc meson_gxbb_aobus_pins[] = {
+	MESON_PIN(GPIOAO_0, 0),
+	MESON_PIN(GPIOAO_1, 0),
+	MESON_PIN(GPIOAO_2, 0),
+	MESON_PIN(GPIOAO_3, 0),
+	MESON_PIN(GPIOAO_4, 0),
+	MESON_PIN(GPIOAO_5, 0),
+	MESON_PIN(GPIOAO_6, 0),
+	MESON_PIN(GPIOAO_7, 0),
+	MESON_PIN(GPIOAO_8, 0),
+	MESON_PIN(GPIOAO_9, 0),
+	MESON_PIN(GPIOAO_10, 0),
+	MESON_PIN(GPIOAO_11, 0),
+	MESON_PIN(GPIOAO_12, 0),
+	MESON_PIN(GPIOAO_13, 0),
+};
+
+static const unsigned int uart_tx_ao_a_pins[]	= { PIN(GPIOAO_0, 0) };
+static const unsigned int uart_rx_ao_a_pins[]	= { PIN(GPIOAO_1, 0) };
+static const unsigned int uart_cts_ao_a_pins[]	= { PIN(GPIOAO_2, 0) };
+static const unsigned int uart_rts_ao_a_pins[]	= { PIN(GPIOAO_3, 0) };
+
+static struct meson_pmx_group meson_gxbb_periphs_groups[] = {
+	GPIO_GROUP(GPIOZ_0, EE_OFF),
+	GPIO_GROUP(GPIOZ_1, EE_OFF),
+	GPIO_GROUP(GPIOZ_2, EE_OFF),
+	GPIO_GROUP(GPIOZ_3, EE_OFF),
+	GPIO_GROUP(GPIOZ_4, EE_OFF),
+	GPIO_GROUP(GPIOZ_5, EE_OFF),
+	GPIO_GROUP(GPIOZ_6, EE_OFF),
+	GPIO_GROUP(GPIOZ_7, EE_OFF),
+	GPIO_GROUP(GPIOZ_8, EE_OFF),
+	GPIO_GROUP(GPIOZ_9, EE_OFF),
+	GPIO_GROUP(GPIOZ_10, EE_OFF),
+	GPIO_GROUP(GPIOZ_11, EE_OFF),
+	GPIO_GROUP(GPIOZ_12, EE_OFF),
+	GPIO_GROUP(GPIOZ_13, EE_OFF),
+	GPIO_GROUP(GPIOZ_14, EE_OFF),
+	GPIO_GROUP(GPIOZ_15, EE_OFF),
+
+	GPIO_GROUP(GPIOH_0, EE_OFF),
+	GPIO_GROUP(GPIOH_1, EE_OFF),
+	GPIO_GROUP(GPIOH_2, EE_OFF),
+	GPIO_GROUP(GPIOH_3, EE_OFF),
+
+	GPIO_GROUP(BOOT_0, EE_OFF),
+	GPIO_GROUP(BOOT_1, EE_OFF),
+	GPIO_GROUP(BOOT_2, EE_OFF),
+	GPIO_GROUP(BOOT_3, EE_OFF),
+	GPIO_GROUP(BOOT_4, EE_OFF),
+	GPIO_GROUP(BOOT_5, EE_OFF),
+	GPIO_GROUP(BOOT_6, EE_OFF),
+	GPIO_GROUP(BOOT_7, EE_OFF),
+	GPIO_GROUP(BOOT_8, EE_OFF),
+	GPIO_GROUP(BOOT_9, EE_OFF),
+	GPIO_GROUP(BOOT_10, EE_OFF),
+	GPIO_GROUP(BOOT_11, EE_OFF),
+	GPIO_GROUP(BOOT_12, EE_OFF),
+	GPIO_GROUP(BOOT_13, EE_OFF),
+	GPIO_GROUP(BOOT_14, EE_OFF),
+	GPIO_GROUP(BOOT_15, EE_OFF),
+	GPIO_GROUP(BOOT_16, EE_OFF),
+	GPIO_GROUP(BOOT_17, EE_OFF),
+
+	GPIO_GROUP(CARD_0, EE_OFF),
+	GPIO_GROUP(CARD_1, EE_OFF),
+	GPIO_GROUP(CARD_2, EE_OFF),
+	GPIO_GROUP(CARD_3, EE_OFF),
+	GPIO_GROUP(CARD_4, EE_OFF),
+	GPIO_GROUP(CARD_5, EE_OFF),
+	GPIO_GROUP(CARD_6, EE_OFF),
+
+	GPIO_GROUP(GPIODV_0, EE_OFF),
+	GPIO_GROUP(GPIODV_1, EE_OFF),
+	GPIO_GROUP(GPIODV_2, EE_OFF),
+	GPIO_GROUP(GPIODV_3, EE_OFF),
+	GPIO_GROUP(GPIODV_4, EE_OFF),
+	GPIO_GROUP(GPIODV_5, EE_OFF),
+	GPIO_GROUP(GPIODV_6, EE_OFF),
+	GPIO_GROUP(GPIODV_7, EE_OFF),
+	GPIO_GROUP(GPIODV_8, EE_OFF),
+	GPIO_GROUP(GPIODV_9, EE_OFF),
+	GPIO_GROUP(GPIODV_10, EE_OFF),
+	GPIO_GROUP(GPIODV_11, EE_OFF),
+	GPIO_GROUP(GPIODV_12, EE_OFF),
+	GPIO_GROUP(GPIODV_13, EE_OFF),
+	GPIO_GROUP(GPIODV_14, EE_OFF),
+	GPIO_GROUP(GPIODV_15, EE_OFF),
+	GPIO_GROUP(GPIODV_16, EE_OFF),
+	GPIO_GROUP(GPIODV_17, EE_OFF),
+	GPIO_GROUP(GPIODV_19, EE_OFF),
+	GPIO_GROUP(GPIODV_20, EE_OFF),
+	GPIO_GROUP(GPIODV_21, EE_OFF),
+	GPIO_GROUP(GPIODV_22, EE_OFF),
+	GPIO_GROUP(GPIODV_23, EE_OFF),
+	GPIO_GROUP(GPIODV_24, EE_OFF),
+	GPIO_GROUP(GPIODV_25, EE_OFF),
+	GPIO_GROUP(GPIODV_26, EE_OFF),
+	GPIO_GROUP(GPIODV_27, EE_OFF),
+	GPIO_GROUP(GPIODV_28, EE_OFF),
+	GPIO_GROUP(GPIODV_29, EE_OFF),
+
+	GPIO_GROUP(GPIOY_0, EE_OFF),
+	GPIO_GROUP(GPIOY_1, EE_OFF),
+	GPIO_GROUP(GPIOY_2, EE_OFF),
+	GPIO_GROUP(GPIOY_3, EE_OFF),
+	GPIO_GROUP(GPIOY_4, EE_OFF),
+	GPIO_GROUP(GPIOY_5, EE_OFF),
+	GPIO_GROUP(GPIOY_6, EE_OFF),
+	GPIO_GROUP(GPIOY_7, EE_OFF),
+	GPIO_GROUP(GPIOY_8, EE_OFF),
+	GPIO_GROUP(GPIOY_9, EE_OFF),
+	GPIO_GROUP(GPIOY_10, EE_OFF),
+	GPIO_GROUP(GPIOY_11, EE_OFF),
+	GPIO_GROUP(GPIOY_12, EE_OFF),
+	GPIO_GROUP(GPIOY_13, EE_OFF),
+	GPIO_GROUP(GPIOY_14, EE_OFF),
+	GPIO_GROUP(GPIOY_15, EE_OFF),
+	GPIO_GROUP(GPIOY_16, EE_OFF),
+
+	GPIO_GROUP(GPIOX_0, EE_OFF),
+	GPIO_GROUP(GPIOX_1, EE_OFF),
+	GPIO_GROUP(GPIOX_2, EE_OFF),
+	GPIO_GROUP(GPIOX_3, EE_OFF),
+	GPIO_GROUP(GPIOX_4, EE_OFF),
+	GPIO_GROUP(GPIOX_5, EE_OFF),
+	GPIO_GROUP(GPIOX_6, EE_OFF),
+	GPIO_GROUP(GPIOX_7, EE_OFF),
+	GPIO_GROUP(GPIOX_8, EE_OFF),
+	GPIO_GROUP(GPIOX_9, EE_OFF),
+	GPIO_GROUP(GPIOX_10, EE_OFF),
+	GPIO_GROUP(GPIOX_11, EE_OFF),
+	GPIO_GROUP(GPIOX_12, EE_OFF),
+	GPIO_GROUP(GPIOX_13, EE_OFF),
+	GPIO_GROUP(GPIOX_14, EE_OFF),
+	GPIO_GROUP(GPIOX_15, EE_OFF),
+	GPIO_GROUP(GPIOX_16, EE_OFF),
+	GPIO_GROUP(GPIOX_17, EE_OFF),
+	GPIO_GROUP(GPIOX_18, EE_OFF),
+	GPIO_GROUP(GPIOX_19, EE_OFF),
+	GPIO_GROUP(GPIOX_20, EE_OFF),
+	GPIO_GROUP(GPIOX_21, EE_OFF),
+	GPIO_GROUP(GPIOX_22, EE_OFF),
+
+	GPIO_GROUP(GPIOCLK_0, EE_OFF),
+	GPIO_GROUP(GPIOCLK_1, EE_OFF),
+	GPIO_GROUP(GPIOCLK_2, EE_OFF),
+	GPIO_GROUP(GPIOCLK_3, EE_OFF),
+
+	GPIO_GROUP(GPIO_TEST_N, EE_OFF),
+};
+
+static struct meson_pmx_group meson_gxbb_aobus_groups[] = {
+	GPIO_GROUP(GPIOAO_0, 0),
+	GPIO_GROUP(GPIOAO_1, 0),
+	GPIO_GROUP(GPIOAO_2, 0),
+	GPIO_GROUP(GPIOAO_3, 0),
+	GPIO_GROUP(GPIOAO_4, 0),
+	GPIO_GROUP(GPIOAO_5, 0),
+	GPIO_GROUP(GPIOAO_6, 0),
+	GPIO_GROUP(GPIOAO_7, 0),
+	GPIO_GROUP(GPIOAO_8, 0),
+	GPIO_GROUP(GPIOAO_9, 0),
+	GPIO_GROUP(GPIOAO_10, 0),
+	GPIO_GROUP(GPIOAO_11, 0),
+	GPIO_GROUP(GPIOAO_12, 0),
+	GPIO_GROUP(GPIOAO_13, 0),
+
+	/* bank AO */
+	GROUP(uart_tx_ao_a,	0,	12),
+	GROUP(uart_rx_ao_a,	0,	11),
+	GROUP(uart_cts_ao_a,	0,	10),
+	GROUP(uart_rts_ao_a,	0,	9),
+};
+
+static const char * const gpio_periphs_groups[] = {
+	"GPIOZ_0", "GPIOZ_1", "GPIOZ_2", "GPIOZ_3", "GPIOZ_4",
+	"GPIOZ_5", "GPIOZ_6", "GPIOZ_7", "GPIOZ_8", "GPIOZ_9",
+	"GPIOZ_10", "GPIOZ_11", "GPIOZ_12", "GPIOZ_13", "GPIOZ_14",
+	"GPIOZ_15",
+
+	"GPIOH_0", "GPIOH_1", "GPIOH_2", "GPIOH_3",
+
+	"BOOT_0", "BOOT_1", "BOOT_2", "BOOT_3", "BOOT_4",
+	"BOOT_5", "BOOT_6", "BOOT_7", "BOOT_8", "BOOT_9",
+	"BOOT_10", "BOOT_11", "BOOT_12", "BOOT_13", "BOOT_14",
+	"BOOT_15", "BOOT_16", "BOOT_17",
+
+	"CARD_0", "CARD_1", "CARD_2", "CARD_3", "CARD_4",
+	"CARD_5", "CARD_6",
+
+	"GPIODV_0", "GPIODV_1", "GPIODV_2", "GPIODV_3", "GPIODV_4",
+	"GPIODV_5", "GPIODV_6", "GPIODV_7", "GPIODV_8", "GPIODV_9",
+	"GPIODV_10", "GPIODV_11", "GPIODV_12", "GPIODV_13", "GPIODV_14",
+	"GPIODV_15", "GPIODV_16", "GPIODV_17", "GPIODV_18", "GPIODV_19",
+	"GPIODV_20", "GPIODV_21", "GPIODV_22", "GPIODV_23", "GPIODV_24",
+	"GPIODV_25", "GPIODV_26", "GPIODV_27", "GPIODV_28", "GPIODV_29",
+
+	"GPIOY_0", "GPIOY_1", "GPIOY_2", "GPIOY_3", "GPIOY_4",
+	"GPIOY_5", "GPIOY_6", "GPIOY_7", "GPIOY_8", "GPIOY_9",
+	"GPIOY_10", "GPIOY_11", "GPIOY_12", "GPIOY_13", "GPIOY_14",
+	"GPIOY_15", "GPIOY_16",
+
+	"GPIOX_0", "GPIOX_1", "GPIOX_2", "GPIOX_3", "GPIOX_4",
+	"GPIOX_5", "GPIOX_6", "GPIOX_7", "GPIOX_8", "GPIOX_9",
+	"GPIOX_10", "GPIOX_11", "GPIOX_12", "GPIOX_13", "GPIOX_14",
+	"GPIOX_15", "GPIOX_16", "GPIOX_17", "GPIOX_18", "GPIOX_19",
+	"GPIOX_20", "GPIOX_21", "GPIOX_22",
+
+	"GPIO_TEST_N",
+};
+
+static const char * const gpio_aobus_groups[] = {
+	"GPIOAO_0", "GPIOAO_1", "GPIOAO_2", "GPIOAO_3", "GPIOAO_4",
+	"GPIOAO_5", "GPIOAO_6", "GPIOAO_7", "GPIOAO_8", "GPIOAO_9",
+	"GPIOAO_10", "GPIOAO_11", "GPIOAO_12", "GPIOAO_13",
+};
+
+static const char * const uart_ao_groups[] = {
+	"uart_tx_ao_a", "uart_rx_ao_a", "uart_cts_ao_a", "uart_rts_ao_a"
+};
+
+static struct meson_pmx_func meson_gxbb_periphs_functions[] = {
+	FUNCTION(gpio_periphs),
+};
+
+static struct meson_pmx_func meson_gxbb_aobus_functions[] = {
+	FUNCTION(gpio_aobus),
+	FUNCTION(uart_ao),
+};
+
+static struct meson_bank meson_gxbb_periphs_banks[] = {
+	/*   name    first                      last                    pullen  pull    dir     out     in  */
+	BANK("X",    PIN(GPIOX_0, EE_OFF),	PIN(GPIOX_22, EE_OFF),  4,  0,  4,  0,  12, 0,  13, 0,  14, 0),
+	BANK("Y",    PIN(GPIOY_0, EE_OFF),	PIN(GPIOY_16, EE_OFF),  1,  0,  1,  0,  3,  0,  4,  0,  5,  0),
+	BANK("DV",   PIN(GPIODV_0, EE_OFF),	PIN(GPIODV_29, EE_OFF), 0,  0,  0,  0,  0,  0,  1,  0,  2,  0),
+	BANK("H",    PIN(GPIOH_0, EE_OFF),	PIN(GPIOH_3, EE_OFF),   1, 20,  1, 20,  3, 20,  4, 20,  5, 20),
+	BANK("Z",    PIN(GPIOZ_0, EE_OFF),	PIN(GPIOZ_15, EE_OFF),  3,  0,  3,  0,  9,  0,  10, 0, 11,  0),
+	BANK("CARD", PIN(CARD_0, EE_OFF),	PIN(CARD_6, EE_OFF),    2, 20,  2, 20,  6, 20,  7, 20,  8, 20),
+	BANK("BOOT", PIN(BOOT_0, EE_OFF),	PIN(BOOT_17, EE_OFF),   2,  0,  2,  0,  6,  0,  7,  0,  8,  0),
+	BANK("CLK",  PIN(GPIOCLK_0, EE_OFF),	PIN(GPIOCLK_3, EE_OFF), 3, 28,  3, 28,  9, 28, 10, 28, 11, 28),
+};
+
+static struct meson_bank meson_gxbb_aobus_banks[] = {
+	/*   name    first              last               pullen  pull    dir     out     in  */
+	BANK("AO",   PIN(GPIOAO_0, 0),  PIN(GPIOAO_13, 0), 0,  0,  0, 16,  0,  0,  0, 16,  1,  0),
+};
+
+static struct meson_domain_data meson_gxbb_periphs_domain_data = {
+	.name		= "periphs-banks",
+	.banks		= meson_gxbb_periphs_banks,
+	.num_banks	= ARRAY_SIZE(meson_gxbb_periphs_banks),
+	.pin_base	= 14,
+	.num_pins	= 120,
+};
+
+static struct meson_domain_data meson_gxbb_aobus_domain_data = {
+	.name		= "aobus-banks",
+	.banks		= meson_gxbb_aobus_banks,
+	.num_banks	= ARRAY_SIZE(meson_gxbb_aobus_banks),
+	.pin_base	= 0,
+	.num_pins	= 14,
+};
+
+struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
+	.pins		= meson_gxbb_periphs_pins,
+	.groups		= meson_gxbb_periphs_groups,
+	.funcs		= meson_gxbb_periphs_functions,
+	.domain_data	= &meson_gxbb_periphs_domain_data,
+	.num_pins	= ARRAY_SIZE(meson_gxbb_periphs_pins),
+	.num_groups	= ARRAY_SIZE(meson_gxbb_periphs_groups),
+	.num_funcs	= ARRAY_SIZE(meson_gxbb_periphs_functions),
+};
+
+struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data = {
+	.pins		= meson_gxbb_aobus_pins,
+	.groups		= meson_gxbb_aobus_groups,
+	.funcs		= meson_gxbb_aobus_functions,
+	.domain_data	= &meson_gxbb_aobus_domain_data,
+	.num_pins	= ARRAY_SIZE(meson_gxbb_aobus_pins),
+	.num_groups	= ARRAY_SIZE(meson_gxbb_aobus_groups),
+	.num_funcs	= ARRAY_SIZE(meson_gxbb_aobus_functions),
+};
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index 0bdb8fd..11623c6 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -171,7 +171,7 @@
 	.get_group_name		= meson_get_group_name,
 	.get_group_pins		= meson_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_all,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 	.pin_dbg_show		= meson_pin_dbg_show,
 };
 
@@ -549,6 +549,14 @@
 		.compatible = "amlogic,meson8b-aobus-pinctrl",
 		.data = &meson8b_aobus_pinctrl_data,
 	},
+	{
+		.compatible = "amlogic,meson-gxbb-periphs-pinctrl",
+		.data = &meson_gxbb_periphs_pinctrl_data,
+	},
+	{
+		.compatible = "amlogic,meson-gxbb-aobus-pinctrl",
+		.data = &meson_gxbb_aobus_pinctrl_data,
+	},
 	{ },
 };
 
@@ -713,7 +721,7 @@
 	pc->desc.pins		= pc->data->pins;
 	pc->desc.npins		= pc->data->num_pins;
 
-	pc->pcdev = pinctrl_register(&pc->desc, pc->dev, pc);
+	pc->pcdev = devm_pinctrl_register(pc->dev, &pc->desc, pc);
 	if (IS_ERR(pc->pcdev)) {
 		dev_err(pc->dev, "can't register pinctrl device");
 		return PTR_ERR(pc->pcdev);
diff --git a/drivers/pinctrl/meson/pinctrl-meson.h b/drivers/pinctrl/meson/pinctrl-meson.h
index 9c93e0d..d89442e 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.h
+++ b/drivers/pinctrl/meson/pinctrl-meson.h
@@ -199,3 +199,5 @@
 extern struct meson_pinctrl_data meson8_aobus_pinctrl_data;
 extern struct meson_pinctrl_data meson8b_cbus_pinctrl_data;
 extern struct meson_pinctrl_data meson8b_aobus_pinctrl_data;
+extern struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data;
+extern struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data;
diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c
index a100bcf..874f2ed 100644
--- a/drivers/pinctrl/meson/pinctrl-meson8b.c
+++ b/drivers/pinctrl/meson/pinctrl-meson8b.c
@@ -564,7 +564,7 @@
 	GROUP(eth_rx_clk,	6,	3),
 	GROUP(eth_txd0_1,	6,	4),
 	GROUP(eth_txd1_1,	6,	5),
-	GROUP(eth_tx_en,	6,	0),
+	GROUP(eth_tx_en,	6,	6),
 	GROUP(eth_ref_clk,	6,	8),
 	GROUP(eth_mdc,		6,	9),
 	GROUP(eth_mdio_en,	6,	10),
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-370.c b/drivers/pinctrl/mvebu/pinctrl-armada-370.c
index 73dc1bc..9cc1cc3 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-370.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-370.c
@@ -417,18 +417,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int armada_370_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver armada_370_pinctrl_driver = {
 	.driver = {
 		.name = "armada-370-pinctrl",
 		.of_match_table = armada_370_pinctrl_of_match,
 	},
 	.probe = armada_370_pinctrl_probe,
-	.remove = armada_370_pinctrl_remove,
 };
 
 module_platform_driver(armada_370_pinctrl_driver);
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-375.c b/drivers/pinctrl/mvebu/pinctrl-armada-375.c
index 54e9fbd..0706514 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-375.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-375.c
@@ -435,18 +435,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int armada_375_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver armada_375_pinctrl_driver = {
 	.driver = {
 		.name = "armada-375-pinctrl",
 		.of_match_table = of_match_ptr(armada_375_pinctrl_of_match),
 	},
 	.probe = armada_375_pinctrl_probe,
-	.remove = armada_375_pinctrl_remove,
 };
 
 module_platform_driver(armada_375_pinctrl_driver);
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c
index 6ec82c6..4e84c8e 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c
@@ -446,18 +446,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int armada_38x_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver armada_38x_pinctrl_driver = {
 	.driver = {
 		.name = "armada-38x-pinctrl",
 		.of_match_table = of_match_ptr(armada_38x_pinctrl_of_match),
 	},
 	.probe = armada_38x_pinctrl_probe,
-	.remove = armada_38x_pinctrl_remove,
 };
 
 module_platform_driver(armada_38x_pinctrl_driver);
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c
index fcfe9b4..e288f8b 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c
@@ -428,18 +428,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int armada_39x_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver armada_39x_pinctrl_driver = {
 	.driver = {
 		.name = "armada-39x-pinctrl",
 		.of_match_table = of_match_ptr(armada_39x_pinctrl_of_match),
 	},
 	.probe = armada_39x_pinctrl_probe,
-	.remove = armada_39x_pinctrl_remove,
 };
 
 module_platform_driver(armada_39x_pinctrl_driver);
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
index bf70e09..e4ea71a 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
@@ -502,18 +502,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int armada_xp_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver armada_xp_pinctrl_driver = {
 	.driver = {
 		.name = "armada-xp-pinctrl",
 		.of_match_table = armada_xp_pinctrl_of_match,
 	},
 	.probe = armada_xp_pinctrl_probe,
-	.remove = armada_xp_pinctrl_remove,
 	.suspend = armada_xp_pinctrl_suspend,
 	.resume = armada_xp_pinctrl_resume,
 };
diff --git a/drivers/pinctrl/mvebu/pinctrl-dove.c b/drivers/pinctrl/mvebu/pinctrl-dove.c
index 95bfd06..f93ae0d 100644
--- a/drivers/pinctrl/mvebu/pinctrl-dove.c
+++ b/drivers/pinctrl/mvebu/pinctrl-dove.c
@@ -840,12 +840,9 @@
 
 static int dove_pinctrl_remove(struct platform_device *pdev)
 {
-	int ret;
-
-	ret = mvebu_pinctrl_remove(pdev);
 	if (!IS_ERR(clk))
 		clk_disable_unprepare(clk);
-	return ret;
+	return 0;
 }
 
 static struct platform_driver dove_pinctrl_driver = {
diff --git a/drivers/pinctrl/mvebu/pinctrl-kirkwood.c b/drivers/pinctrl/mvebu/pinctrl-kirkwood.c
index 0f07dc55..a78e9a4 100644
--- a/drivers/pinctrl/mvebu/pinctrl-kirkwood.c
+++ b/drivers/pinctrl/mvebu/pinctrl-kirkwood.c
@@ -481,18 +481,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int kirkwood_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver kirkwood_pinctrl_driver = {
 	.driver = {
 		.name = "kirkwood-pinctrl",
 		.of_match_table = kirkwood_pinctrl_of_match,
 	},
 	.probe = kirkwood_pinctrl_probe,
-	.remove = kirkwood_pinctrl_remove,
 };
 
 module_platform_driver(kirkwood_pinctrl_driver);
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index 3ef798f..b6ec6db 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -711,7 +711,7 @@
 		return ret;
 	}
 
-	pctl->pctldev = pinctrl_register(&pctl->desc, &pdev->dev, pctl);
+	pctl->pctldev = devm_pinctrl_register(&pdev->dev, &pctl->desc, pctl);
 	if (IS_ERR(pctl->pctldev)) {
 		dev_err(&pdev->dev, "unable to register pinctrl driver\n");
 		return PTR_ERR(pctl->pctldev);
@@ -725,10 +725,3 @@
 
 	return 0;
 }
-
-int mvebu_pinctrl_remove(struct platform_device *pdev)
-{
-	struct mvebu_pinctrl *pctl = platform_get_drvdata(pdev);
-	pinctrl_unregister(pctl->pctldev);
-	return 0;
-}
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.h b/drivers/pinctrl/mvebu/pinctrl-mvebu.h
index 65a98e6..b75a5f4 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.h
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.h
@@ -202,6 +202,5 @@
 }
 
 int mvebu_pinctrl_probe(struct platform_device *pdev);
-int mvebu_pinctrl_remove(struct platform_device *pdev);
 
 #endif
diff --git a/drivers/pinctrl/mvebu/pinctrl-orion.c b/drivers/pinctrl/mvebu/pinctrl-orion.c
index 3b7122d..345c3df 100644
--- a/drivers/pinctrl/mvebu/pinctrl-orion.c
+++ b/drivers/pinctrl/mvebu/pinctrl-orion.c
@@ -239,18 +239,12 @@
 	return mvebu_pinctrl_probe(pdev);
 }
 
-static int orion_pinctrl_remove(struct platform_device *pdev)
-{
-	return mvebu_pinctrl_remove(pdev);
-}
-
 static struct platform_driver orion_pinctrl_driver = {
 	.driver = {
 		.name = "orion-pinctrl",
 		.of_match_table = of_match_ptr(orion_pinctrl_of_match),
 	},
 	.probe = orion_pinctrl_probe,
-	.remove = orion_pinctrl_remove,
 };
 
 module_platform_driver(orion_pinctrl_driver);
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index 1f7469c..7d343c2 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -937,7 +937,7 @@
 		ret = abx500_dt_subnode_to_map(pctldev, np, map,
 				&reserved_maps, num_maps);
 		if (ret < 0) {
-			pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+			pinctrl_utils_free_map(pctldev, *map, *num_maps);
 			return ret;
 		}
 	}
@@ -951,7 +951,7 @@
 	.get_group_pins = abx500_get_group_pins,
 	.pin_dbg_show = abx500_pin_dbg_show,
 	.dt_node_to_map = abx500_dt_node_to_map,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int abx500_pin_config_get(struct pinctrl_dev *pctldev,
@@ -1212,7 +1212,8 @@
 
 	abx500_pinctrl_desc.pins = pct->soc->pins;
 	abx500_pinctrl_desc.npins = pct->soc->npins;
-	pct->pctldev = pinctrl_register(&abx500_pinctrl_desc, &pdev->dev, pct);
+	pct->pctldev = devm_pinctrl_register(&pdev->dev, &abx500_pinctrl_desc,
+					     pct);
 	if (IS_ERR(pct->pctldev)) {
 		dev_err(&pdev->dev,
 			"could not register abx500 pinctrl driver\n");
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index c8969dd..ccbfc32 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
+#include <linux/bitops.h>
 #include <linux/pinctrl/machine.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
@@ -292,15 +293,14 @@
 static void __nmk_gpio_set_mode(struct nmk_gpio_chip *nmk_chip,
 				unsigned offset, int gpio_mode)
 {
-	u32 bit = 1 << offset;
 	u32 afunc, bfunc;
 
-	afunc = readl(nmk_chip->addr + NMK_GPIO_AFSLA) & ~bit;
-	bfunc = readl(nmk_chip->addr + NMK_GPIO_AFSLB) & ~bit;
+	afunc = readl(nmk_chip->addr + NMK_GPIO_AFSLA) & ~BIT(offset);
+	bfunc = readl(nmk_chip->addr + NMK_GPIO_AFSLB) & ~BIT(offset);
 	if (gpio_mode & NMK_GPIO_ALT_A)
-		afunc |= bit;
+		afunc |= BIT(offset);
 	if (gpio_mode & NMK_GPIO_ALT_B)
-		bfunc |= bit;
+		bfunc |= BIT(offset);
 	writel(afunc, nmk_chip->addr + NMK_GPIO_AFSLA);
 	writel(bfunc, nmk_chip->addr + NMK_GPIO_AFSLB);
 }
@@ -308,55 +308,52 @@
 static void __nmk_gpio_set_slpm(struct nmk_gpio_chip *nmk_chip,
 				unsigned offset, enum nmk_gpio_slpm mode)
 {
-	u32 bit = 1 << offset;
 	u32 slpm;
 
 	slpm = readl(nmk_chip->addr + NMK_GPIO_SLPC);
 	if (mode == NMK_GPIO_SLPM_NOCHANGE)
-		slpm |= bit;
+		slpm |= BIT(offset);
 	else
-		slpm &= ~bit;
+		slpm &= ~BIT(offset);
 	writel(slpm, nmk_chip->addr + NMK_GPIO_SLPC);
 }
 
 static void __nmk_gpio_set_pull(struct nmk_gpio_chip *nmk_chip,
 				unsigned offset, enum nmk_gpio_pull pull)
 {
-	u32 bit = 1 << offset;
 	u32 pdis;
 
 	pdis = readl(nmk_chip->addr + NMK_GPIO_PDIS);
 	if (pull == NMK_GPIO_PULL_NONE) {
-		pdis |= bit;
-		nmk_chip->pull_up &= ~bit;
+		pdis |= BIT(offset);
+		nmk_chip->pull_up &= ~BIT(offset);
 	} else {
-		pdis &= ~bit;
+		pdis &= ~BIT(offset);
 	}
 
 	writel(pdis, nmk_chip->addr + NMK_GPIO_PDIS);
 
 	if (pull == NMK_GPIO_PULL_UP) {
-		nmk_chip->pull_up |= bit;
-		writel(bit, nmk_chip->addr + NMK_GPIO_DATS);
+		nmk_chip->pull_up |= BIT(offset);
+		writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DATS);
 	} else if (pull == NMK_GPIO_PULL_DOWN) {
-		nmk_chip->pull_up &= ~bit;
-		writel(bit, nmk_chip->addr + NMK_GPIO_DATC);
+		nmk_chip->pull_up &= ~BIT(offset);
+		writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DATC);
 	}
 }
 
 static void __nmk_gpio_set_lowemi(struct nmk_gpio_chip *nmk_chip,
 				  unsigned offset, bool lowemi)
 {
-	u32 bit = BIT(offset);
-	bool enabled = nmk_chip->lowemi & bit;
+	bool enabled = nmk_chip->lowemi & BIT(offset);
 
 	if (lowemi == enabled)
 		return;
 
 	if (lowemi)
-		nmk_chip->lowemi |= bit;
+		nmk_chip->lowemi |= BIT(offset);
 	else
-		nmk_chip->lowemi &= ~bit;
+		nmk_chip->lowemi &= ~BIT(offset);
 
 	writel_relaxed(nmk_chip->lowemi,
 		       nmk_chip->addr + NMK_GPIO_LOWEMI);
@@ -365,22 +362,22 @@
 static void __nmk_gpio_make_input(struct nmk_gpio_chip *nmk_chip,
 				  unsigned offset)
 {
-	writel(1 << offset, nmk_chip->addr + NMK_GPIO_DIRC);
+	writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DIRC);
 }
 
 static void __nmk_gpio_set_output(struct nmk_gpio_chip *nmk_chip,
 				  unsigned offset, int val)
 {
 	if (val)
-		writel(1 << offset, nmk_chip->addr + NMK_GPIO_DATS);
+		writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DATS);
 	else
-		writel(1 << offset, nmk_chip->addr + NMK_GPIO_DATC);
+		writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DATC);
 }
 
 static void __nmk_gpio_make_output(struct nmk_gpio_chip *nmk_chip,
 				  unsigned offset, int val)
 {
-	writel(1 << offset, nmk_chip->addr + NMK_GPIO_DIRS);
+	writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DIRS);
 	__nmk_gpio_set_output(nmk_chip, offset, val);
 }
 
@@ -614,34 +611,7 @@
 	return NMK_GPIO_ALT_C;
 }
 
-int nmk_gpio_get_mode(int gpio)
-{
-	struct nmk_gpio_chip *nmk_chip;
-	u32 afunc, bfunc, bit;
-
-	nmk_chip = nmk_gpio_chips[gpio / NMK_GPIO_PER_CHIP];
-	if (!nmk_chip)
-		return -EINVAL;
-
-	bit = 1 << (gpio % NMK_GPIO_PER_CHIP);
-
-	clk_enable(nmk_chip->clk);
-
-	afunc = readl(nmk_chip->addr + NMK_GPIO_AFSLA) & bit;
-	bfunc = readl(nmk_chip->addr + NMK_GPIO_AFSLB) & bit;
-
-	clk_disable(nmk_chip->clk);
-
-	return (afunc ? NMK_GPIO_ALT_A : 0) | (bfunc ? NMK_GPIO_ALT_B : 0);
-}
-EXPORT_SYMBOL(nmk_gpio_get_mode);
-
-
 /* IRQ functions */
-static inline int nmk_gpio_get_bitmask(int gpio)
-{
-	return 1 << (gpio % NMK_GPIO_PER_CHIP);
-}
 
 static void nmk_gpio_irq_ack(struct irq_data *d)
 {
@@ -649,7 +619,7 @@
 	struct nmk_gpio_chip *nmk_chip = gpiochip_get_data(chip);
 
 	clk_enable(nmk_chip->clk);
-	writel(nmk_gpio_get_bitmask(d->hwirq), nmk_chip->addr + NMK_GPIO_IC);
+	writel(BIT(d->hwirq), nmk_chip->addr + NMK_GPIO_IC);
 	clk_disable(nmk_chip->clk);
 }
 
@@ -659,10 +629,9 @@
 };
 
 static void __nmk_gpio_irq_modify(struct nmk_gpio_chip *nmk_chip,
-				  int gpio, enum nmk_gpio_irq_type which,
+				  int offset, enum nmk_gpio_irq_type which,
 				  bool enable)
 {
-	u32 bitmask = nmk_gpio_get_bitmask(gpio);
 	u32 *rimscval;
 	u32 *fimscval;
 	u32 rimscreg;
@@ -681,24 +650,24 @@
 	}
 
 	/* we must individually set/clear the two edges */
-	if (nmk_chip->edge_rising & bitmask) {
+	if (nmk_chip->edge_rising & BIT(offset)) {
 		if (enable)
-			*rimscval |= bitmask;
+			*rimscval |= BIT(offset);
 		else
-			*rimscval &= ~bitmask;
+			*rimscval &= ~BIT(offset);
 		writel(*rimscval, nmk_chip->addr + rimscreg);
 	}
-	if (nmk_chip->edge_falling & bitmask) {
+	if (nmk_chip->edge_falling & BIT(offset)) {
 		if (enable)
-			*fimscval |= bitmask;
+			*fimscval |= BIT(offset);
 		else
-			*fimscval &= ~bitmask;
+			*fimscval &= ~BIT(offset);
 		writel(*fimscval, nmk_chip->addr + fimscreg);
 	}
 }
 
 static void __nmk_gpio_set_wake(struct nmk_gpio_chip *nmk_chip,
-				int gpio, bool on)
+				int offset, bool on)
 {
 	/*
 	 * Ensure WAKEUP_ENABLE is on.  No need to disable it if wakeup is
@@ -706,21 +675,19 @@
 	 * wakeup is anyhow controlled by the RIMSC and FIMSC registers.
 	 */
 	if (nmk_chip->sleepmode && on) {
-		__nmk_gpio_set_slpm(nmk_chip, gpio % NMK_GPIO_PER_CHIP,
+		__nmk_gpio_set_slpm(nmk_chip, offset,
 				    NMK_GPIO_SLPM_WAKEUP_ENABLE);
 	}
 
-	__nmk_gpio_irq_modify(nmk_chip, gpio, WAKE, on);
+	__nmk_gpio_irq_modify(nmk_chip, offset, WAKE, on);
 }
 
 static int nmk_gpio_irq_maskunmask(struct irq_data *d, bool enable)
 {
 	struct nmk_gpio_chip *nmk_chip;
 	unsigned long flags;
-	u32 bitmask;
 
 	nmk_chip = irq_data_get_irq_chip_data(d);
-	bitmask = nmk_gpio_get_bitmask(d->hwirq);
 	if (!nmk_chip)
 		return -EINVAL;
 
@@ -730,7 +697,7 @@
 
 	__nmk_gpio_irq_modify(nmk_chip, d->hwirq, NORMAL, enable);
 
-	if (!(nmk_chip->real_wake & bitmask))
+	if (!(nmk_chip->real_wake & BIT(d->hwirq)))
 		__nmk_gpio_set_wake(nmk_chip, d->hwirq, enable);
 
 	spin_unlock(&nmk_chip->lock);
@@ -754,12 +721,10 @@
 {
 	struct nmk_gpio_chip *nmk_chip;
 	unsigned long flags;
-	u32 bitmask;
 
 	nmk_chip = irq_data_get_irq_chip_data(d);
 	if (!nmk_chip)
 		return -EINVAL;
-	bitmask = nmk_gpio_get_bitmask(d->hwirq);
 
 	clk_enable(nmk_chip->clk);
 	spin_lock_irqsave(&nmk_gpio_slpm_lock, flags);
@@ -769,9 +734,9 @@
 		__nmk_gpio_set_wake(nmk_chip, d->hwirq, on);
 
 	if (on)
-		nmk_chip->real_wake |= bitmask;
+		nmk_chip->real_wake |= BIT(d->hwirq);
 	else
-		nmk_chip->real_wake &= ~bitmask;
+		nmk_chip->real_wake &= ~BIT(d->hwirq);
 
 	spin_unlock(&nmk_chip->lock);
 	spin_unlock_irqrestore(&nmk_gpio_slpm_lock, flags);
@@ -786,10 +751,8 @@
 	bool wake = irqd_is_wakeup_set(d);
 	struct nmk_gpio_chip *nmk_chip;
 	unsigned long flags;
-	u32 bitmask;
 
 	nmk_chip = irq_data_get_irq_chip_data(d);
-	bitmask = nmk_gpio_get_bitmask(d->hwirq);
 	if (!nmk_chip)
 		return -EINVAL;
 	if (type & IRQ_TYPE_LEVEL_HIGH)
@@ -806,13 +769,13 @@
 	if (enabled || wake)
 		__nmk_gpio_irq_modify(nmk_chip, d->hwirq, WAKE, false);
 
-	nmk_chip->edge_rising &= ~bitmask;
+	nmk_chip->edge_rising &= ~BIT(d->hwirq);
 	if (type & IRQ_TYPE_EDGE_RISING)
-		nmk_chip->edge_rising |= bitmask;
+		nmk_chip->edge_rising |= BIT(d->hwirq);
 
-	nmk_chip->edge_falling &= ~bitmask;
+	nmk_chip->edge_falling &= ~BIT(d->hwirq);
 	if (type & IRQ_TYPE_EDGE_FALLING)
-		nmk_chip->edge_falling |= bitmask;
+		nmk_chip->edge_falling |= BIT(d->hwirq);
 
 	if (enabled)
 		__nmk_gpio_irq_modify(nmk_chip, d->hwirq, NORMAL, true);
@@ -884,13 +847,27 @@
 
 /* I/O Functions */
 
+static int nmk_gpio_get_dir(struct gpio_chip *chip, unsigned offset)
+{
+	struct nmk_gpio_chip *nmk_chip = gpiochip_get_data(chip);
+	int dir;
+
+	clk_enable(nmk_chip->clk);
+
+	dir = !!(readl(nmk_chip->addr + NMK_GPIO_DIR) & BIT(offset));
+
+	clk_disable(nmk_chip->clk);
+
+	return dir;
+}
+
 static int nmk_gpio_make_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct nmk_gpio_chip *nmk_chip = gpiochip_get_data(chip);
 
 	clk_enable(nmk_chip->clk);
 
-	writel(1 << offset, nmk_chip->addr + NMK_GPIO_DIRC);
+	writel(BIT(offset), nmk_chip->addr + NMK_GPIO_DIRC);
 
 	clk_disable(nmk_chip->clk);
 
@@ -900,12 +877,11 @@
 static int nmk_gpio_get_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct nmk_gpio_chip *nmk_chip = gpiochip_get_data(chip);
-	u32 bit = 1 << offset;
 	int value;
 
 	clk_enable(nmk_chip->clk);
 
-	value = (readl(nmk_chip->addr + NMK_GPIO_DAT) & bit) != 0;
+	value = !!(readl(nmk_chip->addr + NMK_GPIO_DAT) & BIT(offset));
 
 	clk_disable(nmk_chip->clk);
 
@@ -939,6 +915,19 @@
 }
 
 #ifdef CONFIG_DEBUG_FS
+static int nmk_gpio_get_mode(struct nmk_gpio_chip *nmk_chip, int offset)
+{
+	u32 afunc, bfunc;
+
+	clk_enable(nmk_chip->clk);
+
+	afunc = readl(nmk_chip->addr + NMK_GPIO_AFSLA) & BIT(offset);
+	bfunc = readl(nmk_chip->addr + NMK_GPIO_AFSLB) & BIT(offset);
+
+	clk_disable(nmk_chip->clk);
+
+	return (afunc ? NMK_GPIO_ALT_A : 0) | (bfunc ? NMK_GPIO_ALT_B : 0);
+}
 
 #include <linux/seq_file.h>
 
@@ -952,7 +941,6 @@
 	bool is_out;
 	bool data_out;
 	bool pull;
-	u32 bit = 1 << offset;
 	const char *modes[] = {
 		[NMK_GPIO_ALT_GPIO]	= "gpio",
 		[NMK_GPIO_ALT_A]	= "altA",
@@ -970,10 +958,10 @@
 	};
 
 	clk_enable(nmk_chip->clk);
-	is_out = !!(readl(nmk_chip->addr + NMK_GPIO_DIR) & bit);
-	pull = !(readl(nmk_chip->addr + NMK_GPIO_PDIS) & bit);
-	data_out = !!(readl(nmk_chip->addr + NMK_GPIO_DAT) & bit);
-	mode = nmk_gpio_get_mode(gpio);
+	is_out = !!(readl(nmk_chip->addr + NMK_GPIO_DIR) & BIT(offset));
+	pull = !(readl(nmk_chip->addr + NMK_GPIO_PDIS) & BIT(offset));
+	data_out = !!(readl(nmk_chip->addr + NMK_GPIO_DAT) & BIT(offset));
+	mode = nmk_gpio_get_mode(nmk_chip, offset);
 	if ((mode == NMK_GPIO_ALT_C) && pctldev)
 		mode = nmk_prcm_gpiocr_get_mode(pctldev, gpio);
 
@@ -1007,11 +995,10 @@
 		 */
 		if (irq > 0 && desc && desc->action) {
 			char *trigger;
-			u32 bitmask = nmk_gpio_get_bitmask(gpio);
 
-			if (nmk_chip->edge_rising & bitmask)
+			if (nmk_chip->edge_rising & BIT(offset))
 				trigger = "edge-rising";
-			else if (nmk_chip->edge_falling & bitmask)
+			else if (nmk_chip->edge_falling & BIT(offset))
 				trigger = "edge-falling";
 			else
 				trigger = "edge-undefined";
@@ -1246,6 +1233,7 @@
 	chip = &nmk_chip->chip;
 	chip->request = gpiochip_generic_request;
 	chip->free = gpiochip_generic_free;
+	chip->get_direction = nmk_gpio_get_dir;
 	chip->direction_input = nmk_gpio_make_input;
 	chip->get = nmk_gpio_get_input;
 	chip->direction_output = nmk_gpio_make_output;
@@ -1612,7 +1600,7 @@
 		ret = nmk_pinctrl_dt_subnode_to_map(pctldev, np, map,
 				&reserved_maps, num_maps);
 		if (ret < 0) {
-			pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+			pinctrl_utils_free_map(pctldev, *map, *num_maps);
 			return ret;
 		}
 	}
@@ -1626,7 +1614,7 @@
 	.get_group_pins = nmk_get_group_pins,
 	.pin_dbg_show = nmk_pin_dbg_show,
 	.dt_node_to_map = nmk_pinctrl_dt_node_to_map,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int nmk_pmx_get_funcs_cnt(struct pinctrl_dev *pctldev)
@@ -2044,7 +2032,7 @@
 	nmk_pinctrl_desc.npins = npct->soc->npins;
 	npct->dev = &pdev->dev;
 
-	npct->pctl = pinctrl_register(&nmk_pinctrl_desc, &pdev->dev, npct);
+	npct->pctl = devm_pinctrl_register(&pdev->dev, &nmk_pinctrl_desc, npct);
 	if (IS_ERR(npct->pctl)) {
 		dev_err(&pdev->dev, "could not register Nomadik pinctrl driver\n");
 		return PTR_ERR(npct->pctl);
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 79e6159..d5bf9fa 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -386,7 +386,7 @@
 	return 0;
 
 exit:
-	pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+	pinctrl_utils_free_map(pctldev, *map, *num_maps);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pinconf_generic_dt_node_to_map);
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index ecb57635..54569a7 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -1058,7 +1058,8 @@
 	adi_pinmux_desc.npins = pinctrl->soc->npins;
 
 	/* Now register the pin controller and all pins it handles */
-	pinctrl->pctl = pinctrl_register(&adi_pinmux_desc, &pdev->dev, pinctrl);
+	pinctrl->pctl = devm_pinctrl_register(&pdev->dev, &adi_pinmux_desc,
+					      pinctrl);
 	if (IS_ERR(pinctrl->pctl)) {
 		dev_err(&pdev->dev, "could not register pinctrl ADI2 driver\n");
 		return PTR_ERR(pinctrl->pctl);
@@ -1069,18 +1070,8 @@
 	return 0;
 }
 
-static int adi_pinctrl_remove(struct platform_device *pdev)
-{
-	struct adi_pinctrl *pinctrl = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pinctrl->pctl);
-
-	return 0;
-}
-
 static struct platform_driver adi_pinctrl_driver = {
 	.probe		= adi_pinctrl_probe,
-	.remove		= adi_pinctrl_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 	},
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 5c025f5..634b4d3 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -580,7 +580,7 @@
 	.get_group_pins		= amd_get_group_pins,
 #ifdef CONFIG_OF
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 #endif
 };
 
@@ -783,8 +783,8 @@
 	gpio_dev->ngroups = ARRAY_SIZE(kerncz_groups);
 
 	amd_pinctrl_desc.name = dev_name(&pdev->dev);
-	gpio_dev->pctrl = pinctrl_register(&amd_pinctrl_desc,
-					&pdev->dev, gpio_dev);
+	gpio_dev->pctrl = devm_pinctrl_register(&pdev->dev, &amd_pinctrl_desc,
+						gpio_dev);
 	if (IS_ERR(gpio_dev->pctrl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(gpio_dev->pctrl);
@@ -792,7 +792,7 @@
 
 	ret = gpiochip_add_data(&gpio_dev->gc, gpio_dev);
 	if (ret)
-		goto out1;
+		return ret;
 
 	ret = gpiochip_add_pin_range(&gpio_dev->gc, dev_name(&pdev->dev),
 				0, 0, TOTAL_NUMBER_OF_PINS);
@@ -825,8 +825,6 @@
 out2:
 	gpiochip_remove(&gpio_dev->gc);
 
-out1:
-	pinctrl_unregister(gpio_dev->pctrl);
 	return ret;
 }
 
@@ -837,13 +835,13 @@
 	gpio_dev = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&gpio_dev->gc);
-	pinctrl_unregister(gpio_dev->pctrl);
 
 	return 0;
 }
 
 static const struct acpi_device_id amd_gpio_acpi_match[] = {
 	{ "AMD0030", 0 },
+	{ "AMDI0030", 0},
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, amd_gpio_acpi_match);
diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c
index e844fdc..4e9fe78 100644
--- a/drivers/pinctrl/pinctrl-as3722.c
+++ b/drivers/pinctrl/pinctrl-as3722.c
@@ -201,7 +201,7 @@
 	.get_group_name = as3722_pinctrl_get_group_name,
 	.get_group_pins = as3722_pinctrl_get_group_pins,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int as3722_pinctrl_get_funcs_count(struct pinctrl_dev *pctldev)
@@ -569,8 +569,8 @@
 	as3722_pinctrl_desc.name = dev_name(&pdev->dev);
 	as3722_pinctrl_desc.pins = as3722_pins_desc;
 	as3722_pinctrl_desc.npins = ARRAY_SIZE(as3722_pins_desc);
-	as_pci->pctl = pinctrl_register(&as3722_pinctrl_desc,
-					&pdev->dev, as_pci);
+	as_pci->pctl = devm_pinctrl_register(&pdev->dev, &as3722_pinctrl_desc,
+					     as_pci);
 	if (IS_ERR(as_pci->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(as_pci->pctl);
@@ -582,7 +582,7 @@
 	ret = gpiochip_add_data(&as_pci->gpio_chip, as_pci);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Couldn't register gpiochip, %d\n", ret);
-		goto fail_chip_add;
+		return ret;
 	}
 
 	ret = gpiochip_add_pin_range(&as_pci->gpio_chip, dev_name(&pdev->dev),
@@ -596,8 +596,6 @@
 
 fail_range_add:
 	gpiochip_remove(&as_pci->gpio_chip);
-fail_chip_add:
-	pinctrl_unregister(as_pci->pctl);
 	return ret;
 }
 
@@ -606,7 +604,6 @@
 	struct as3722_pctrl_info *as_pci = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&as_pci->gpio_chip);
-	pinctrl_unregister(as_pci->pctl);
 	return 0;
 }
 
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 2c447130..a025b40 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -579,7 +579,7 @@
 	}
 
 	if (ret < 0) {
-		pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+		pinctrl_utils_free_map(pctldev, *map, *num_maps);
 		dev_err(pctldev->dev, "can't create maps for node %s\n",
 			np_config->full_name);
 	}
@@ -592,7 +592,7 @@
 	.get_group_name		= atmel_pctl_get_group_name,
 	.get_group_pins		= atmel_pctl_get_group_pins,
 	.dt_node_to_map		= atmel_pctl_dt_node_to_map,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int atmel_pmx_get_functions_count(struct pinctrl_dev *pctldev)
@@ -1036,18 +1036,19 @@
 		goto clk_prepare_enable_error;
 	}
 
-	atmel_pioctrl->pinctrl_dev = pinctrl_register(&atmel_pinctrl_desc,
-						      &pdev->dev,
-						      atmel_pioctrl);
-	if (!atmel_pioctrl->pinctrl_dev) {
+	atmel_pioctrl->pinctrl_dev = devm_pinctrl_register(&pdev->dev,
+							   &atmel_pinctrl_desc,
+							   atmel_pioctrl);
+	if (IS_ERR(atmel_pioctrl->pinctrl_dev)) {
+		ret = PTR_ERR(atmel_pioctrl->pinctrl_dev);
 		dev_err(dev, "pinctrl registration failed\n");
-		goto pinctrl_register_error;
+		goto clk_unprep;
 	}
 
 	ret = gpiochip_add_data(atmel_pioctrl->gpio_chip, atmel_pioctrl);
 	if (ret) {
 		dev_err(dev, "failed to add gpiochip\n");
-		goto gpiochip_add_error;
+		goto clk_unprep;
 	}
 
 	ret = gpiochip_add_pin_range(atmel_pioctrl->gpio_chip, dev_name(dev),
@@ -1061,15 +1062,15 @@
 
 	return 0;
 
-clk_prepare_enable_error:
-	irq_domain_remove(atmel_pioctrl->irq_domain);
-pinctrl_register_error:
-	clk_disable_unprepare(atmel_pioctrl->clk);
-gpiochip_add_error:
-	pinctrl_unregister(atmel_pioctrl->pinctrl_dev);
 gpiochip_add_pin_range_error:
 	gpiochip_remove(atmel_pioctrl->gpio_chip);
 
+clk_unprep:
+	clk_disable_unprepare(atmel_pioctrl->clk);
+
+clk_prepare_enable_error:
+	irq_domain_remove(atmel_pioctrl->irq_domain);
+
 	return ret;
 }
 
@@ -1079,7 +1080,6 @@
 
 	irq_domain_remove(atmel_pioctrl->irq_domain);
 	clk_disable_unprepare(atmel_pioctrl->clk);
-	pinctrl_unregister(atmel_pioctrl->pinctrl_dev);
 	gpiochip_remove(atmel_pioctrl->gpio_chip);
 
 	return 0;
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 523b6b7..b7c0d6f 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1252,7 +1252,8 @@
 	}
 
 	platform_set_drvdata(pdev, info);
-	info->pctl = pinctrl_register(&at91_pinctrl_desc, &pdev->dev, info);
+	info->pctl = devm_pinctrl_register(&pdev->dev, &at91_pinctrl_desc,
+					   info);
 
 	if (IS_ERR(info->pctl)) {
 		dev_err(&pdev->dev, "could not register AT91 pinctrl driver\n");
@@ -1269,15 +1270,6 @@
 	return 0;
 }
 
-static int at91_pinctrl_remove(struct platform_device *pdev)
-{
-	struct at91_pinctrl *info = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(info->pctl);
-
-	return 0;
-}
-
 static int at91_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 {
 	struct at91_gpio_chip *at91_gpio = gpiochip_get_data(chip);
@@ -1660,7 +1652,7 @@
 }
 
 /* This structure is replicated for each GPIO block allocated at probe time */
-static struct gpio_chip at91_gpio_template = {
+static const struct gpio_chip at91_gpio_template = {
 	.request		= gpiochip_generic_request,
 	.free			= gpiochip_generic_free,
 	.get_direction		= at91_gpio_get_direction,
@@ -1730,14 +1722,9 @@
 		goto err;
 	}
 
-	ret = clk_prepare(at91_chip->clock);
-	if (ret)
-		goto clk_prepare_err;
-
-	/* enable PIO controller's clock */
-	ret = clk_enable(at91_chip->clock);
+	ret = clk_prepare_enable(at91_chip->clock);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to enable clock, ignoring.\n");
+		dev_err(&pdev->dev, "failed to prepare and enable clock, ignoring.\n");
 		goto clk_enable_err;
 	}
 
@@ -1797,10 +1784,8 @@
 irq_setup_err:
 	gpiochip_remove(chip);
 gpiochip_add_err:
-	clk_disable(at91_chip->clock);
 clk_enable_err:
-	clk_unprepare(at91_chip->clock);
-clk_prepare_err:
+	clk_disable_unprepare(at91_chip->clock);
 err:
 	dev_err(&pdev->dev, "Failure %i for GPIO %i\n", ret, alias_idx);
 
@@ -1821,7 +1806,6 @@
 		.of_match_table = at91_pinctrl_of_match,
 	},
 	.probe = at91_pinctrl_probe,
-	.remove = at91_pinctrl_remove,
 };
 
 static struct platform_driver * const drivers[] = {
diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c
index f1343d6..30ee564 100644
--- a/drivers/pinctrl/pinctrl-digicolor.c
+++ b/drivers/pinctrl/pinctrl-digicolor.c
@@ -84,7 +84,7 @@
 	.get_group_name		= dc_get_group_name,
 	.get_group_pins		= dc_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static const char *const dc_functions[] = {
@@ -280,7 +280,7 @@
 	struct pinctrl_desc *pctl_desc;
 	char *pin_names;
 	int name_len = strlen("GP_xx") + 1;
-	int i, j, ret;
+	int i, j;
 
 	pmap = devm_kzalloc(&pdev->dev, sizeof(*pmap), GFP_KERNEL);
 	if (!pmap)
@@ -326,26 +326,19 @@
 
 	pmap->dev = &pdev->dev;
 
-	pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap);
+	pmap->pctl = devm_pinctrl_register(&pdev->dev, pctl_desc, pmap);
 	if (IS_ERR(pmap->pctl)) {
 		dev_err(&pdev->dev, "pinctrl driver registration failed\n");
 		return PTR_ERR(pmap->pctl);
 	}
 
-	ret = dc_gpiochip_add(pmap, pdev->dev.of_node);
-	if (ret < 0) {
-		pinctrl_unregister(pmap->pctl);
-		return ret;
-	}
-
-	return 0;
+	return dc_gpiochip_add(pmap, pdev->dev.of_node);
 }
 
 static int dc_pinctrl_remove(struct platform_device *pdev)
 {
 	struct dc_pinmap *pmap = platform_get_drvdata(pdev);
 
-	pinctrl_unregister(pmap->pctl);
 	gpiochip_remove(&pmap->chip);
 
 	return 0;
diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c
index fc38a85..a4d6474 100644
--- a/drivers/pinctrl/pinctrl-lantiq.c
+++ b/drivers/pinctrl/pinctrl-lantiq.c
@@ -336,7 +336,7 @@
 	desc->pmxops = &ltq_pmx_ops;
 	info->dev = &pdev->dev;
 
-	info->pctrl = pinctrl_register(desc, &pdev->dev, info);
+	info->pctrl = devm_pinctrl_register(&pdev->dev, desc, info);
 	if (IS_ERR(info->pctrl)) {
 		dev_err(&pdev->dev, "failed to register LTQ pinmux driver\n");
 		return PTR_ERR(info->pctrl);
diff --git a/drivers/pinctrl/pinctrl-lpc18xx.c b/drivers/pinctrl/pinctrl-lpc18xx.c
index b1767f7..8a931c7 100644
--- a/drivers/pinctrl/pinctrl-lpc18xx.c
+++ b/drivers/pinctrl/pinctrl-lpc18xx.c
@@ -1252,7 +1252,7 @@
 	.get_group_name		= lpc18xx_pctl_get_group_name,
 	.get_group_pins		= lpc18xx_pctl_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static struct pinctrl_desc lpc18xx_scu_desc = {
@@ -1355,7 +1355,7 @@
 
 	platform_set_drvdata(pdev, scu);
 
-	scu->pctl = pinctrl_register(&lpc18xx_scu_desc, &pdev->dev, scu);
+	scu->pctl = devm_pinctrl_register(&pdev->dev, &lpc18xx_scu_desc, scu);
 	if (IS_ERR(scu->pctl)) {
 		dev_err(&pdev->dev, "Could not register pinctrl driver\n");
 		clk_disable_unprepare(scu->clk);
@@ -1369,7 +1369,6 @@
 {
 	struct lpc18xx_scu_data *scu = platform_get_drvdata(pdev);
 
-	pinctrl_unregister(scu->pctl);
 	clk_disable_unprepare(scu->clk);
 
 	return 0;
diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c
index f7e1680..8edb3f8c 100644
--- a/drivers/pinctrl/pinctrl-palmas.c
+++ b/drivers/pinctrl/pinctrl-palmas.c
@@ -656,7 +656,7 @@
 	.get_group_name = palmas_pinctrl_get_group_name,
 	.get_group_pins = palmas_pinctrl_get_group_pins,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int palmas_pinctrl_get_funcs_count(struct pinctrl_dev *pctldev)
@@ -1043,7 +1043,8 @@
 	palmas_pinctrl_desc.name = dev_name(&pdev->dev);
 	palmas_pinctrl_desc.pins = palmas_pins_desc;
 	palmas_pinctrl_desc.npins = ARRAY_SIZE(palmas_pins_desc);
-	pci->pctl = pinctrl_register(&palmas_pinctrl_desc, &pdev->dev, pci);
+	pci->pctl = devm_pinctrl_register(&pdev->dev, &palmas_pinctrl_desc,
+					  pci);
 	if (IS_ERR(pci->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pci->pctl);
@@ -1051,21 +1052,12 @@
 	return 0;
 }
 
-static int palmas_pinctrl_remove(struct platform_device *pdev)
-{
-	struct palmas_pctrl_chip_info *pci = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pci->pctl);
-	return 0;
-}
-
 static struct platform_driver palmas_pinctrl_driver = {
 	.driver = {
 		.name = "palmas-pinctrl",
 		.of_match_table = palmas_pinctrl_of_match,
 	},
 	.probe = palmas_pinctrl_probe,
-	.remove = palmas_pinctrl_remove,
 };
 
 module_platform_driver(palmas_pinctrl_driver);
diff --git a/drivers/pinctrl/pinctrl-pic32.c b/drivers/pinctrl/pinctrl-pic32.c
index 0b07d4b..31ceb95 100644
--- a/drivers/pinctrl/pinctrl-pic32.c
+++ b/drivers/pinctrl/pinctrl-pic32.c
@@ -1743,7 +1743,7 @@
 	.get_group_name = pic32_pinctrl_get_group_name,
 	.get_group_pins = pic32_pinctrl_get_group_pins,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int pic32_pinmux_get_functions_count(struct pinctrl_dev *pctldev)
@@ -2194,7 +2194,8 @@
 	pic32_pinctrl_desc.custom_params = pic32_mpp_bindings;
 	pic32_pinctrl_desc.num_custom_params = ARRAY_SIZE(pic32_mpp_bindings);
 
-	pctl->pctldev = pinctrl_register(&pic32_pinctrl_desc, &pdev->dev, pctl);
+	pctl->pctldev = devm_pinctrl_register(&pdev->dev, &pic32_pinctrl_desc,
+					      pctl);
 	if (IS_ERR(pctl->pctldev)) {
 		dev_err(&pdev->dev, "Failed to register pinctrl device\n");
 		return PTR_ERR(pctl->pctldev);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index 2673cd9..c6d410e 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -913,7 +913,7 @@
 	.get_group_name = pistachio_pinctrl_get_group_name,
 	.get_group_pins = pistachio_pinctrl_get_group_pins,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int pistachio_pinmux_get_functions_count(struct pinctrl_dev *pctldev)
@@ -1457,8 +1457,8 @@
 	pistachio_pinctrl_desc.pins = pctl->pins;
 	pistachio_pinctrl_desc.npins = pctl->npins;
 
-	pctl->pctldev = pinctrl_register(&pistachio_pinctrl_desc, &pdev->dev,
-					 pctl);
+	pctl->pctldev = devm_pinctrl_register(&pdev->dev, &pistachio_pinctrl_desc,
+					      pctl);
 	if (IS_ERR(pctl->pctldev)) {
 		dev_err(&pdev->dev, "Failed to register pinctrl device\n");
 		return PTR_ERR(pctl->pctldev);
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index bf032b9..a91026e 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -99,6 +99,15 @@
 };
 
 /**
+ * enum type index corresponding to rockchip_pull_list arrays index.
+ */
+enum rockchip_pin_pull_type {
+	PULL_TYPE_IO_DEFAULT = 0,
+	PULL_TYPE_IO_1V8_ONLY,
+	PULL_TYPE_MAX
+};
+
+/**
  * @drv_type: drive strength variant using rockchip_perpin_drv_type
  * @offset: if initialized to -1 it will be autocalculated, by specifying
  *	    an initial offset value the relevant source offset can be reset
@@ -123,6 +132,7 @@
  * @bank_num: number of the bank, to account for holes
  * @iomux: array describing the 4 iomux sources of the bank
  * @drv: array describing the 4 drive strength sources of the bank
+ * @pull_type: array describing the 4 pull type sources of the bank
  * @valid: are all necessary informations present
  * @of_node: dt node of this bank
  * @drvdata: common pinctrl basedata
@@ -143,6 +153,7 @@
 	u8				bank_num;
 	struct rockchip_iomux		iomux[4];
 	struct rockchip_drv		drv[4];
+	enum rockchip_pin_pull_type	pull_type[4];
 	bool				valid;
 	struct device_node		*of_node;
 	struct rockchip_pinctrl		*drvdata;
@@ -198,6 +209,31 @@
 		},							\
 	}
 
+#define PIN_BANK_DRV_FLAGS_PULL_FLAGS(id, pins, label, drv0, drv1,	\
+				      drv2, drv3, pull0, pull1,		\
+				      pull2, pull3)			\
+	{								\
+		.bank_num	= id,					\
+		.nr_pins	= pins,					\
+		.name		= label,				\
+		.iomux		= {					\
+			{ .offset = -1 },				\
+			{ .offset = -1 },				\
+			{ .offset = -1 },				\
+			{ .offset = -1 },				\
+		},							\
+		.drv		= {					\
+			{ .drv_type = drv0, .offset = -1 },		\
+			{ .drv_type = drv1, .offset = -1 },		\
+			{ .drv_type = drv2, .offset = -1 },		\
+			{ .drv_type = drv3, .offset = -1 },		\
+		},							\
+		.pull_type[0] = pull0,					\
+		.pull_type[1] = pull1,					\
+		.pull_type[2] = pull2,					\
+		.pull_type[3] = pull3,					\
+	}
+
 #define PIN_BANK_IOMUX_DRV_FLAGS_OFFSET(id, pins, label, iom0, iom1,	\
 					iom2, iom3, drv0, drv1, drv2,	\
 					drv3, offset0, offset1,		\
@@ -220,6 +256,34 @@
 		},							\
 	}
 
+#define PIN_BANK_IOMUX_FLAGS_DRV_FLAGS_OFFSET_PULL_FLAGS(id, pins,	\
+					      label, iom0, iom1, iom2,  \
+					      iom3, drv0, drv1, drv2,   \
+					      drv3, offset0, offset1,   \
+					      offset2, offset3, pull0,  \
+					      pull1, pull2, pull3)	\
+	{								\
+		.bank_num	= id,					\
+		.nr_pins	= pins,					\
+		.name		= label,				\
+		.iomux		= {					\
+			{ .type = iom0, .offset = -1 },			\
+			{ .type = iom1, .offset = -1 },			\
+			{ .type = iom2, .offset = -1 },			\
+			{ .type = iom3, .offset = -1 },			\
+		},							\
+		.drv		= {					\
+			{ .drv_type = drv0, .offset = offset0 },	\
+			{ .drv_type = drv1, .offset = offset1 },	\
+			{ .drv_type = drv2, .offset = offset2 },	\
+			{ .drv_type = drv3, .offset = offset3 },	\
+		},							\
+		.pull_type[0] = pull0,					\
+		.pull_type[1] = pull1,					\
+		.pull_type[2] = pull2,					\
+		.pull_type[3] = pull3,					\
+	}
+
 /**
  */
 struct rockchip_pin_ctrl {
@@ -1020,12 +1084,27 @@
 	return ret;
 }
 
+static int rockchip_pull_list[PULL_TYPE_MAX][4] = {
+	{
+		PIN_CONFIG_BIAS_DISABLE,
+		PIN_CONFIG_BIAS_PULL_UP,
+		PIN_CONFIG_BIAS_PULL_DOWN,
+		PIN_CONFIG_BIAS_BUS_HOLD
+	},
+	{
+		PIN_CONFIG_BIAS_DISABLE,
+		PIN_CONFIG_BIAS_PULL_DOWN,
+		PIN_CONFIG_BIAS_DISABLE,
+		PIN_CONFIG_BIAS_PULL_UP
+	},
+};
+
 static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
 {
 	struct rockchip_pinctrl *info = bank->drvdata;
 	struct rockchip_pin_ctrl *ctrl = info->ctrl;
 	struct regmap *regmap;
-	int reg, ret;
+	int reg, ret, pull_type;
 	u8 bit;
 	u32 data;
 
@@ -1048,22 +1127,11 @@
 	case RK3288:
 	case RK3368:
 	case RK3399:
+		pull_type = bank->pull_type[pin_num / 8];
 		data >>= bit;
 		data &= (1 << RK3188_PULL_BITS_PER_PIN) - 1;
 
-		switch (data) {
-		case 0:
-			return PIN_CONFIG_BIAS_DISABLE;
-		case 1:
-			return PIN_CONFIG_BIAS_PULL_UP;
-		case 2:
-			return PIN_CONFIG_BIAS_PULL_DOWN;
-		case 3:
-			return PIN_CONFIG_BIAS_BUS_HOLD;
-		}
-
-		dev_err(info->dev, "unknown pull setting\n");
-		return -EIO;
+		return rockchip_pull_list[pull_type][data];
 	default:
 		dev_err(info->dev, "unsupported pinctrl type\n");
 		return -EINVAL;
@@ -1076,7 +1144,7 @@
 	struct rockchip_pinctrl *info = bank->drvdata;
 	struct rockchip_pin_ctrl *ctrl = info->ctrl;
 	struct regmap *regmap;
-	int reg, ret;
+	int reg, ret, i, pull_type;
 	unsigned long flags;
 	u8 bit;
 	u32 data, rmask;
@@ -1105,30 +1173,28 @@
 	case RK3288:
 	case RK3368:
 	case RK3399:
+		pull_type = bank->pull_type[pin_num / 8];
+		ret = -EINVAL;
+		for (i = 0; i < ARRAY_SIZE(rockchip_pull_list[pull_type]);
+			i++) {
+			if (rockchip_pull_list[pull_type][i] == pull) {
+				ret = i;
+				break;
+			}
+		}
+
+		if (ret < 0) {
+			dev_err(info->dev, "unsupported pull setting %d\n",
+				pull);
+			return ret;
+		}
+
 		spin_lock_irqsave(&bank->slock, flags);
 
 		/* enable the write to the equivalent lower bits */
 		data = ((1 << RK3188_PULL_BITS_PER_PIN) - 1) << (bit + 16);
 		rmask = data | (data >> 16);
-
-		switch (pull) {
-		case PIN_CONFIG_BIAS_DISABLE:
-			break;
-		case PIN_CONFIG_BIAS_PULL_UP:
-			data |= (1 << bit);
-			break;
-		case PIN_CONFIG_BIAS_PULL_DOWN:
-			data |= (2 << bit);
-			break;
-		case PIN_CONFIG_BIAS_BUS_HOLD:
-			data |= (3 << bit);
-			break;
-		default:
-			spin_unlock_irqrestore(&bank->slock, flags);
-			dev_err(info->dev, "unsupported pull setting %d\n",
-				pull);
-			return -EINVAL;
-		}
+		data |= (ret << bit);
 
 		ret = regmap_update_bits(regmap, reg, rmask, data);
 
@@ -1208,6 +1274,16 @@
 	return 0;
 }
 
+static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+{
+	struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+	u32 data;
+
+	data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
+
+	return !(data & BIT(offset));
+}
+
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -1636,7 +1712,7 @@
 	if (ret)
 		return ret;
 
-	info->pctl_dev = pinctrl_register(ctrldesc, &pdev->dev, info);
+	info->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, info);
 	if (IS_ERR(info->pctl_dev)) {
 		dev_err(&pdev->dev, "could not register pinctrl driver\n");
 		return PTR_ERR(info->pctl_dev);
@@ -1741,6 +1817,7 @@
 	.free = gpiochip_generic_free,
 	.set = rockchip_gpio_set,
 	.get = rockchip_gpio_get,
+	.get_direction	= rockchip_gpio_get_direction,
 	.direction_input = rockchip_gpio_direction_input,
 	.direction_output = rockchip_gpio_direction_output,
 	.to_irq = rockchip_gpio_to_irq,
@@ -2541,19 +2618,24 @@
 };
 
 static struct rockchip_pin_bank rk3399_pin_banks[] = {
-	PIN_BANK_IOMUX_DRV_FLAGS_OFFSET(0, 32, "gpio0", IOMUX_SOURCE_PMU,
-					IOMUX_SOURCE_PMU,
-					IOMUX_SOURCE_PMU,
-					IOMUX_SOURCE_PMU,
-					DRV_TYPE_IO_1V8_ONLY,
-					DRV_TYPE_IO_1V8_ONLY,
-					DRV_TYPE_IO_DEFAULT,
-					DRV_TYPE_IO_DEFAULT,
-					0x0,
-					0x8,
-					-1,
-					-1
-					),
+	PIN_BANK_IOMUX_FLAGS_DRV_FLAGS_OFFSET_PULL_FLAGS(0, 32, "gpio0",
+							 IOMUX_SOURCE_PMU,
+							 IOMUX_SOURCE_PMU,
+							 IOMUX_SOURCE_PMU,
+							 IOMUX_SOURCE_PMU,
+							 DRV_TYPE_IO_1V8_ONLY,
+							 DRV_TYPE_IO_1V8_ONLY,
+							 DRV_TYPE_IO_DEFAULT,
+							 DRV_TYPE_IO_DEFAULT,
+							 0x0,
+							 0x8,
+							 -1,
+							 -1,
+							 PULL_TYPE_IO_1V8_ONLY,
+							 PULL_TYPE_IO_1V8_ONLY,
+							 PULL_TYPE_IO_DEFAULT,
+							 PULL_TYPE_IO_DEFAULT
+							),
 	PIN_BANK_IOMUX_DRV_FLAGS_OFFSET(1, 32, "gpio1", IOMUX_SOURCE_PMU,
 					IOMUX_SOURCE_PMU,
 					IOMUX_SOURCE_PMU,
@@ -2567,11 +2649,15 @@
 					0x30,
 					0x38
 					),
-	PIN_BANK_DRV_FLAGS(2, 32, "gpio2", DRV_TYPE_IO_1V8_OR_3V0,
-			   DRV_TYPE_IO_1V8_OR_3V0,
-			   DRV_TYPE_IO_1V8_ONLY,
-			   DRV_TYPE_IO_1V8_ONLY
-			   ),
+	PIN_BANK_DRV_FLAGS_PULL_FLAGS(2, 32, "gpio2", DRV_TYPE_IO_1V8_OR_3V0,
+				      DRV_TYPE_IO_1V8_OR_3V0,
+				      DRV_TYPE_IO_1V8_ONLY,
+				      DRV_TYPE_IO_1V8_ONLY,
+				      PULL_TYPE_IO_DEFAULT,
+				      PULL_TYPE_IO_DEFAULT,
+				      PULL_TYPE_IO_1V8_ONLY,
+				      PULL_TYPE_IO_1V8_ONLY
+				      ),
 	PIN_BANK_DRV_FLAGS(3, 32, "gpio3", DRV_TYPE_IO_3V3_ONLY,
 			   DRV_TYPE_IO_3V3_ONLY,
 			   DRV_TYPE_IO_3V3_ONLY,
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index cab66c6..d0ba968 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1724,7 +1724,7 @@
 	pctl_desc->confops	= &st_confops;
 	pctl_desc->name		= dev_name(&pdev->dev);
 
-	info->pctl = pinctrl_register(pctl_desc, &pdev->dev, info);
+	info->pctl = devm_pinctrl_register(&pdev->dev, pctl_desc, info);
 	if (IS_ERR(info->pctl)) {
 		dev_err(&pdev->dev, "Failed pinctrl registration\n");
 		return PTR_ERR(info->pctl);
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index 6546b9b..edfba50 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -582,7 +582,7 @@
 	.get_group_name   = tb10x_get_group_name,
 	.get_group_pins   = tb10x_get_group_pins,
 	.dt_node_to_map   = tb10x_dt_node_to_map,
-	.dt_free_map      = pinctrl_utils_dt_free_map,
+	.dt_free_map      = pinctrl_utils_free_map,
 };
 
 static int tb10x_get_functions_count(struct pinctrl_dev *pctl)
@@ -806,7 +806,7 @@
 		}
 	}
 
-	state->pctl = pinctrl_register(&tb10x_pindesc, dev, state);
+	state->pctl = devm_pinctrl_register(dev, &tb10x_pindesc, state);
 	if (IS_ERR(state->pctl)) {
 		dev_err(dev, "could not register TB10x pin driver\n");
 		ret = PTR_ERR(state->pctl);
@@ -824,7 +824,6 @@
 {
 	struct tb10x_pinctrl *state = platform_get_drvdata(pdev);
 
-	pinctrl_unregister(state->pctl);
 	mutex_destroy(&state->mutex);
 
 	return 0;
diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c
index b89ad3c..e70e362 100644
--- a/drivers/pinctrl/pinctrl-tz1090-pdc.c
+++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c
@@ -947,7 +947,8 @@
 	if (IS_ERR(pmx->regs))
 		return PTR_ERR(pmx->regs);
 
-	pmx->pctl = pinctrl_register(&tz1090_pdc_pinctrl_desc, &pdev->dev, pmx);
+	pmx->pctl = devm_pinctrl_register(&pdev->dev, &tz1090_pdc_pinctrl_desc,
+					  pmx);
 	if (IS_ERR(pmx->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pmx->pctl);
@@ -960,15 +961,6 @@
 	return 0;
 }
 
-static int tz1090_pdc_pinctrl_remove(struct platform_device *pdev)
-{
-	struct tz1090_pdc_pmx *pmx = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pmx->pctl);
-
-	return 0;
-}
-
 static const struct of_device_id tz1090_pdc_pinctrl_of_match[] = {
 	{ .compatible = "img,tz1090-pdc-pinctrl", },
 	{ },
@@ -980,7 +972,6 @@
 		.of_match_table	= tz1090_pdc_pinctrl_of_match,
 	},
 	.probe	= tz1090_pdc_pinctrl_probe,
-	.remove	= tz1090_pdc_pinctrl_remove,
 };
 
 static int __init tz1090_pdc_pinctrl_init(void)
diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c
index 5425299..04cbe53 100644
--- a/drivers/pinctrl/pinctrl-tz1090.c
+++ b/drivers/pinctrl/pinctrl-tz1090.c
@@ -1962,7 +1962,8 @@
 	if (IS_ERR(pmx->regs))
 		return PTR_ERR(pmx->regs);
 
-	pmx->pctl = pinctrl_register(&tz1090_pinctrl_desc, &pdev->dev, pmx);
+	pmx->pctl = devm_pinctrl_register(&pdev->dev, &tz1090_pinctrl_desc,
+					  pmx);
 	if (IS_ERR(pmx->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pmx->pctl);
@@ -1975,15 +1976,6 @@
 	return 0;
 }
 
-static int tz1090_pinctrl_remove(struct platform_device *pdev)
-{
-	struct tz1090_pmx *pmx = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pmx->pctl);
-
-	return 0;
-}
-
 static const struct of_device_id tz1090_pinctrl_of_match[] = {
 	{ .compatible = "img,tz1090-pinctrl", },
 	{ },
@@ -1995,7 +1987,6 @@
 		.of_match_table	= tz1090_pinctrl_of_match,
 	},
 	.probe	= tz1090_pinctrl_probe,
-	.remove	= tz1090_pinctrl_remove,
 };
 
 static int __init tz1090_pinctrl_init(void)
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index c076021..d1af908 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -1067,7 +1067,7 @@
 	if (IS_ERR(upmx->virtbase))
 		return PTR_ERR(upmx->virtbase);
 
-	upmx->pctl = pinctrl_register(&u300_pmx_desc, &pdev->dev, upmx);
+	upmx->pctl = devm_pinctrl_register(&pdev->dev, &u300_pmx_desc, upmx);
 	if (IS_ERR(upmx->pctl)) {
 		dev_err(&pdev->dev, "could not register U300 pinmux driver\n");
 		return PTR_ERR(upmx->pctl);
@@ -1080,15 +1080,6 @@
 	return 0;
 }
 
-static int u300_pmx_remove(struct platform_device *pdev)
-{
-	struct u300_pmx *upmx = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(upmx->pctl);
-
-	return 0;
-}
-
 static const struct of_device_id u300_pinctrl_match[] = {
 	{ .compatible = "stericsson,pinctrl-u300" },
 	{},
@@ -1101,7 +1092,6 @@
 		.of_match_table = u300_pinctrl_match,
 	},
 	.probe = u300_pmx_probe,
-	.remove = u300_pmx_remove,
 };
 
 static int __init u300_pmx_init(void)
diff --git a/drivers/pinctrl/pinctrl-utils.c b/drivers/pinctrl/pinctrl-utils.c
index d77693f..9189fba 100644
--- a/drivers/pinctrl/pinctrl-utils.c
+++ b/drivers/pinctrl/pinctrl-utils.c
@@ -122,7 +122,7 @@
 }
 EXPORT_SYMBOL_GPL(pinctrl_utils_add_config);
 
-void pinctrl_utils_dt_free_map(struct pinctrl_dev *pctldev,
+void pinctrl_utils_free_map(struct pinctrl_dev *pctldev,
 	      struct pinctrl_map *map, unsigned num_maps)
 {
 	int i;
@@ -139,4 +139,4 @@
 	}
 	kfree(map);
 }
-EXPORT_SYMBOL_GPL(pinctrl_utils_dt_free_map);
+EXPORT_SYMBOL_GPL(pinctrl_utils_free_map);
diff --git a/drivers/pinctrl/pinctrl-utils.h b/drivers/pinctrl/pinctrl-utils.h
index d0ffe1c..8f9f2d2 100644
--- a/drivers/pinctrl/pinctrl-utils.h
+++ b/drivers/pinctrl/pinctrl-utils.h
@@ -37,7 +37,7 @@
 int pinctrl_utils_add_config(struct pinctrl_dev *pctldev,
 		unsigned long **configs, unsigned *num_configs,
 		unsigned long config);
-void pinctrl_utils_dt_free_map(struct pinctrl_dev *pctldev,
+void pinctrl_utils_free_map(struct pinctrl_dev *pctldev,
 		struct pinctrl_map *map, unsigned num_maps);
 
 #endif /* __PINCTRL_UTILS_H__ */
diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c
index 76f1abd..8fdc60c 100644
--- a/drivers/pinctrl/pinctrl-zynq.c
+++ b/drivers/pinctrl/pinctrl-zynq.c
@@ -862,7 +862,7 @@
 	.get_group_name = zynq_pctrl_get_group_name,
 	.get_group_pins = zynq_pctrl_get_group_pins,
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_all,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 /* pinmux */
@@ -1195,7 +1195,7 @@
 	pctrl->funcs = zynq_pmux_functions;
 	pctrl->nfuncs = ARRAY_SIZE(zynq_pmux_functions);
 
-	pctrl->pctrl = pinctrl_register(&zynq_desc, &pdev->dev, pctrl);
+	pctrl->pctrl = devm_pinctrl_register(&pdev->dev, &zynq_desc, pctrl);
 	if (IS_ERR(pctrl->pctrl))
 		return PTR_ERR(pctrl->pctrl);
 
@@ -1206,15 +1206,6 @@
 	return 0;
 }
 
-static int zynq_pinctrl_remove(struct platform_device *pdev)
-{
-	struct zynq_pinctrl *pctrl = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pctrl->pctrl);
-
-	return 0;
-}
-
 static const struct of_device_id zynq_pinctrl_of_match[] = {
 	{ .compatible = "xlnx,pinctrl-zynq" },
 	{ }
@@ -1227,7 +1218,6 @@
 		.of_match_table = zynq_pinctrl_of_match,
 	},
 	.probe = zynq_pinctrl_probe,
-	.remove = zynq_pinctrl_remove,
 };
 
 static int __init zynq_pinctrl_init(void)
diff --git a/drivers/pinctrl/pxa/Kconfig b/drivers/pinctrl/pxa/Kconfig
index 990667f..c29bdcf 100644
--- a/drivers/pinctrl/pxa/Kconfig
+++ b/drivers/pinctrl/pxa/Kconfig
@@ -6,12 +6,20 @@
 	select PINCONF
 	select GENERIC_PINCONF
 
+config PINCTRL_PXA25X
+	tristate "Marvell PXA25x pin controller driver"
+	select PINCTRL_PXA
+	default y if PXA25x
+	help
+	  This is the pinctrl, pinmux, pinconf driver for the Marvell
+	  PXA2xx block found in the pxa25x platforms.
+
 config PINCTRL_PXA27X
 	tristate "Marvell PXA27x pin controller driver"
 	select PINCTRL_PXA
 	default y if PXA27x
 	help
 	  This is the pinctrl, pinmux, pinconf driver for the Marvell
-	  PXA2xx block found in the pxa25x and pxa27x platforms.
+	  PXA2xx block found in the pxa27x platforms.
 
 endif
diff --git a/drivers/pinctrl/pxa/Makefile b/drivers/pinctrl/pxa/Makefile
index f1d56af..ca2ade1 100644
--- a/drivers/pinctrl/pxa/Makefile
+++ b/drivers/pinctrl/pxa/Makefile
@@ -1,2 +1,3 @@
 # Marvell PXA pin control drivers
+obj-$(CONFIG_PINCTRL_PXA25X)	+= pinctrl-pxa2xx.o pinctrl-pxa25x.o
 obj-$(CONFIG_PINCTRL_PXA27X)	+= pinctrl-pxa2xx.o pinctrl-pxa27x.o
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa25x.c b/drivers/pinctrl/pxa/pinctrl-pxa25x.c
new file mode 100644
index 0000000..b98ecb3
--- /dev/null
+++ b/drivers/pinctrl/pxa/pinctrl-pxa25x.c
@@ -0,0 +1,274 @@
+/*
+ * Marvell PXA25x family pin control
+ *
+ * Copyright (C) 2016 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-pxa2xx.h"
+
+static const struct pxa_desc_pin pxa25x_pins[] = {
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(0)),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(1),
+		     PXA_FUNCTION(0, 1, "GP_RST")),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(2)),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(3)),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(4)),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(5)),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(6),
+		     PXA_FUNCTION(1, 1, "MMCCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(7),
+		     PXA_FUNCTION(1, 1, "48_MHz")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(8),
+		     PXA_FUNCTION(1, 1, "MMCCS0")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(9),
+		     PXA_FUNCTION(1, 1, "MMCCS1")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(10),
+		     PXA_FUNCTION(1, 1, "RTCCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(11),
+		     PXA_FUNCTION(1, 1, "3_6_MHz")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(12),
+		     PXA_FUNCTION(1, 1, "32_kHz")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(13),
+		     PXA_FUNCTION(1, 2, "MBGNT")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(14),
+		     PXA_FUNCTION(0, 1, "MBREQ")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(15),
+		     PXA_FUNCTION(1, 2, "nCS_1")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(16),
+		     PXA_FUNCTION(1, 2, "PWM0")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(17),
+		     PXA_FUNCTION(1, 2, "PWM1")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(18),
+		     PXA_FUNCTION(0, 1, "RDY")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(19),
+		     PXA_FUNCTION(0, 1, "DREQ[1]")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(20),
+		     PXA_FUNCTION(0, 1, "DREQ[0]")),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(21)),
+	PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(22)),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(23),
+		     PXA_FUNCTION(1, 2, "SCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(24),
+		     PXA_FUNCTION(1, 2, "SFRM")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(25),
+		     PXA_FUNCTION(1, 2, "TXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(26),
+		     PXA_FUNCTION(0, 1, "RXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(27),
+		     PXA_FUNCTION(0, 1, "EXTCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(28),
+		     PXA_FUNCTION(0, 1, "BITCLK"),
+		     PXA_FUNCTION(0, 2, "BITCLK"),
+		     PXA_FUNCTION(1, 1, "BITCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(29),
+		     PXA_FUNCTION(0, 1, "SDATA_IN0"),
+		     PXA_FUNCTION(0, 2, "SDATA_IN")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(30),
+		     PXA_FUNCTION(1, 1, "SDATA_OUT"),
+		     PXA_FUNCTION(1, 2, "SDATA_OUT")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(31),
+		     PXA_FUNCTION(1, 1, "SYNC"),
+		     PXA_FUNCTION(1, 2, "SYNC")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(32),
+		     PXA_FUNCTION(0, 1, "SDATA_IN1"),
+		     PXA_FUNCTION(1, 1, "SYSCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(33),
+		     PXA_FUNCTION(1, 2, "nCS[5]")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(34),
+		     PXA_FUNCTION(0, 1, "FFRXD"),
+		     PXA_FUNCTION(1, 2, "MMCCS0")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(35),
+		     PXA_FUNCTION(0, 1, "CTS")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(36),
+		     PXA_FUNCTION(0, 1, "DCD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(37),
+		     PXA_FUNCTION(0, 1, "DSR")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(38),
+		     PXA_FUNCTION(0, 1, "RI")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(39),
+		     PXA_FUNCTION(1, 1, "MMCC1"),
+		     PXA_FUNCTION(1, 2, "FFTXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(40),
+		     PXA_FUNCTION(1, 2, "DTR")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(41),
+		     PXA_FUNCTION(1, 2, "RTS")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(42),
+		     PXA_FUNCTION(0, 1, "BTRXD"),
+		     PXA_FUNCTION(0, 3, "HWRXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(43),
+		     PXA_FUNCTION(1, 2, "BTTXD"),
+		     PXA_FUNCTION(1, 3, "HWTXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(44),
+		     PXA_FUNCTION(0, 1, "BTCTS"),
+		     PXA_FUNCTION(0, 3, "HWCTS")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(45),
+		     PXA_FUNCTION(1, 2, "BTRTS"),
+		     PXA_FUNCTION(1, 3, "HWRTS")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(46),
+		     PXA_FUNCTION(0, 1, "ICP_RXD"),
+		     PXA_FUNCTION(0, 2, "RXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(47),
+		     PXA_FUNCTION(1, 1, "TXD"),
+		     PXA_FUNCTION(1, 2, "ICP_TXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(48),
+		     PXA_FUNCTION(1, 1, "HWTXD"),
+		     PXA_FUNCTION(1, 2, "nPOE")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(49),
+		     PXA_FUNCTION(0, 1, "HWRXD"),
+		     PXA_FUNCTION(1, 2, "nPWE")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(50),
+		     PXA_FUNCTION(0, 1, "HWCTS"),
+		     PXA_FUNCTION(1, 2, "nPIOR")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(51),
+		     PXA_FUNCTION(1, 1, "HWRTS"),
+		     PXA_FUNCTION(1, 2, "nPIOW")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(52),
+		     PXA_FUNCTION(1, 2, "nPCE[1]")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(53),
+		     PXA_FUNCTION(1, 1, "MMCCLK"),
+		     PXA_FUNCTION(1, 2, "nPCE[2]")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(54),
+		     PXA_FUNCTION(1, 1, "MMCCLK"),
+		     PXA_FUNCTION(1, 2, "nPSKTSEL")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(55),
+		     PXA_FUNCTION(1, 2, "nPREG")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(56),
+		     PXA_FUNCTION(0, 1, "nPWAIT")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(57),
+		     PXA_FUNCTION(0, 1, "nIOIS16")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(58),
+		     PXA_FUNCTION(1, 2, "LDD<0>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(59),
+		     PXA_FUNCTION(1, 2, "LDD<1>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(60),
+		     PXA_FUNCTION(1, 2, "LDD<2>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(61),
+		     PXA_FUNCTION(1, 2, "LDD<3>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(62),
+		     PXA_FUNCTION(1, 2, "LDD<4>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(63),
+		     PXA_FUNCTION(1, 2, "LDD<5>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(64),
+		     PXA_FUNCTION(1, 2, "LDD<6>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(65),
+		     PXA_FUNCTION(1, 2, "LDD<7>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(66),
+		     PXA_FUNCTION(0, 1, "MBREQ"),
+		     PXA_FUNCTION(1, 2, "LDD<8>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(67),
+		     PXA_FUNCTION(1, 1, "MMCCS0"),
+		     PXA_FUNCTION(1, 2, "LDD<9>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(68),
+		     PXA_FUNCTION(1, 1, "MMCCS1"),
+		     PXA_FUNCTION(1, 2, "LDD<10>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(69),
+		     PXA_FUNCTION(1, 1, "MMCCLK"),
+		     PXA_FUNCTION(1, 2, "LDD<11>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(70),
+		     PXA_FUNCTION(1, 1, "RTCCLK"),
+		     PXA_FUNCTION(1, 2, "LDD<12>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(71),
+		     PXA_FUNCTION(1, 1, "3_6_MHz"),
+		     PXA_FUNCTION(1, 2, "LDD<13>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(72),
+		     PXA_FUNCTION(1, 1, "32_kHz"),
+		     PXA_FUNCTION(1, 2, "LDD<14>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(73),
+		     PXA_FUNCTION(1, 1, "MBGNT"),
+		     PXA_FUNCTION(1, 2, "LDD<15>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(74),
+		     PXA_FUNCTION(1, 2, "LCD_FCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(75),
+		     PXA_FUNCTION(1, 2, "LCD_LCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(76),
+		     PXA_FUNCTION(1, 2, "LCD_PCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(77),
+		     PXA_FUNCTION(1, 2, "LCD_ACBIAS")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(78),
+		     PXA_FUNCTION(1, 2, "nCS<2>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(79),
+		     PXA_FUNCTION(1, 2, "nCS<3>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(80),
+		     PXA_FUNCTION(1, 2, "nCS<4>")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(81),
+		     PXA_FUNCTION(0, 1, "NSSPSCLK"),
+		     PXA_FUNCTION(1, 1, "NSSPSCLK")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(82),
+		     PXA_FUNCTION(0, 1, "NSSPSFRM"),
+		     PXA_FUNCTION(1, 1, "NSSPSFRM")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(83),
+		     PXA_FUNCTION(0, 2, "NSSPRXD"),
+		     PXA_FUNCTION(1, 1, "NSSPTXD")),
+	PXA_GPIO_PIN(PXA_PINCTRL_PIN(84),
+		     PXA_FUNCTION(0, 2, "NSSPRXD"),
+		     PXA_FUNCTION(1, 1, "NSSPTXD")),
+};
+
+static int pxa25x_pinctrl_probe(struct platform_device *pdev)
+{
+	int ret, i;
+	void __iomem *base_af[8];
+	void __iomem *base_dir[4];
+	void __iomem *base_sleep[4];
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base_af[0] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base_af[0]))
+		return PTR_ERR(base_af[0]);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	base_dir[0] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base_dir[0]))
+		return PTR_ERR(base_dir[0]);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	base_dir[3] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base_dir[3]))
+		return PTR_ERR(base_dir[3]);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+	base_sleep[0] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base_sleep[0]))
+		return PTR_ERR(base_sleep[0]);
+
+	for (i = 0; i < ARRAY_SIZE(base_af); i++)
+		base_af[i] = base_af[0] + sizeof(base_af[0]) * i;
+	for (i = 0; i < 3; i++)
+		base_dir[i] = base_dir[0] + sizeof(base_dir[0]) * i;
+	for (i = 0; i < ARRAY_SIZE(base_sleep); i++)
+		base_sleep[i] = base_sleep[0] + sizeof(base_af[0]) * i;
+
+	ret = pxa2xx_pinctrl_init(pdev, pxa25x_pins, ARRAY_SIZE(pxa25x_pins),
+				  base_af, base_dir, base_sleep);
+	return ret;
+}
+
+static const struct of_device_id pxa25x_pinctrl_match[] = {
+	{ .compatible = "marvell,pxa25x-pinctrl", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pxa25x_pinctrl_match);
+
+static struct platform_driver pxa25x_pinctrl_driver = {
+	.probe	= pxa25x_pinctrl_probe,
+	.driver	= {
+		.name		= "pxa25x-pinctrl",
+		.of_match_table	= pxa25x_pinctrl_match,
+	},
+};
+module_platform_driver(pxa25x_pinctrl_driver);
+
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_DESCRIPTION("Marvell PXA25x pinctrl driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
index f553313..866aa3c 100644
--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -57,7 +57,7 @@
 static const struct pinctrl_ops pxa2xx_pctl_ops = {
 #ifdef CONFIG_OF
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_all,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 #endif
 	.get_groups_count	= pxa2xx_pctrl_get_groups_count,
 	.get_group_name		= pxa2xx_pctrl_get_group_name,
@@ -416,7 +416,7 @@
 	if (ret)
 		return ret;
 
-	pctl->pctl_dev = pinctrl_register(&pctl->desc, &pdev->dev, pctl);
+	pctl->pctl_dev = devm_pinctrl_register(&pdev->dev, &pctl->desc, pctl);
 	if (IS_ERR(pctl->pctl_dev)) {
 		dev_err(&pdev->dev, "couldn't register pinctrl driver\n");
 		return PTR_ERR(pctl->pctl_dev);
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 8777cf0..1a44e1d 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -101,7 +101,7 @@
 	.get_group_name		= msm_get_group_name,
 	.get_group_pins		= msm_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int msm_get_functions_count(struct pinctrl_dev *pctldev)
@@ -898,17 +898,16 @@
 	msm_pinctrl_desc.name = dev_name(&pdev->dev);
 	msm_pinctrl_desc.pins = pctrl->soc->pins;
 	msm_pinctrl_desc.npins = pctrl->soc->npins;
-	pctrl->pctrl = pinctrl_register(&msm_pinctrl_desc, &pdev->dev, pctrl);
+	pctrl->pctrl = devm_pinctrl_register(&pdev->dev, &msm_pinctrl_desc,
+					     pctrl);
 	if (IS_ERR(pctrl->pctrl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pctrl->pctrl);
 	}
 
 	ret = msm_gpio_init(pctrl);
-	if (ret) {
-		pinctrl_unregister(pctrl->pctrl);
+	if (ret)
 		return ret;
-	}
 
 	platform_set_drvdata(pdev, pctrl);
 
@@ -923,7 +922,6 @@
 	struct msm_pinctrl *pctrl = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&pctrl->chip);
-	pinctrl_unregister(pctrl->pctrl);
 
 	unregister_restart_handler(&pctrl->restart_nb);
 
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index 4e12ded..686accb 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -212,7 +212,7 @@
 	.get_group_name		= pmic_gpio_get_group_name,
 	.get_group_pins		= pmic_gpio_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int pmic_gpio_get_functions_count(struct pinctrl_dev *pctldev)
@@ -764,14 +764,14 @@
 	state->chip.of_gpio_n_cells = 2;
 	state->chip.can_sleep = false;
 
-	state->ctrl = pinctrl_register(pctrldesc, dev, state);
+	state->ctrl = devm_pinctrl_register(dev, pctrldesc, state);
 	if (IS_ERR(state->ctrl))
 		return PTR_ERR(state->ctrl);
 
 	ret = gpiochip_add_data(&state->chip, state);
 	if (ret) {
 		dev_err(state->dev, "can't add gpio chip\n");
-		goto err_chip;
+		return ret;
 	}
 
 	ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, npins);
@@ -784,8 +784,6 @@
 
 err_range:
 	gpiochip_remove(&state->chip);
-err_chip:
-	pinctrl_unregister(state->ctrl);
 	return ret;
 }
 
@@ -794,7 +792,6 @@
 	struct pmic_gpio_state *state = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&state->chip);
-	pinctrl_unregister(state->ctrl);
 	return 0;
 }
 
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
index 2a3e549..1735ffe 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
@@ -235,7 +235,7 @@
 	.get_group_name		= pmic_mpp_get_group_name,
 	.get_group_pins		= pmic_mpp_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int pmic_mpp_get_functions_count(struct pinctrl_dev *pctldev)
@@ -877,14 +877,14 @@
 	state->chip.of_gpio_n_cells = 2;
 	state->chip.can_sleep = false;
 
-	state->ctrl = pinctrl_register(pctrldesc, dev, state);
+	state->ctrl = devm_pinctrl_register(dev, pctrldesc, state);
 	if (IS_ERR(state->ctrl))
 		return PTR_ERR(state->ctrl);
 
 	ret = gpiochip_add_data(&state->chip, state);
 	if (ret) {
 		dev_err(state->dev, "can't add gpio chip\n");
-		goto err_chip;
+		return ret;
 	}
 
 	ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, npins);
@@ -897,8 +897,6 @@
 
 err_range:
 	gpiochip_remove(&state->chip);
-err_chip:
-	pinctrl_unregister(state->ctrl);
 	return ret;
 }
 
@@ -907,7 +905,6 @@
 	struct pmic_mpp_state *state = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&state->chip);
-	pinctrl_unregister(state->ctrl);
 	return 0;
 }
 
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index cd8580d..d3f5501d 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -200,7 +200,7 @@
 	.get_group_name		= pm8xxx_get_group_name,
 	.get_group_pins         = pm8xxx_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int pm8xxx_get_functions_count(struct pinctrl_dev *pctldev)
@@ -729,7 +729,7 @@
 	pctrl->desc.custom_conf_items = pm8xxx_conf_items;
 #endif
 
-	pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl);
+	pctrl->pctrl = devm_pinctrl_register(&pdev->dev, &pctrl->desc, pctrl);
 	if (IS_ERR(pctrl->pctrl)) {
 		dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n");
 		return PTR_ERR(pctrl->pctrl);
@@ -745,7 +745,7 @@
 	ret = gpiochip_add_data(&pctrl->chip, pctrl);
 	if (ret) {
 		dev_err(&pdev->dev, "failed register gpiochip\n");
-		goto unregister_pinctrl;
+		return ret;
 	}
 
 	ret = gpiochip_add_pin_range(&pctrl->chip,
@@ -765,9 +765,6 @@
 unregister_gpiochip:
 	gpiochip_remove(&pctrl->chip);
 
-unregister_pinctrl:
-	pinctrl_unregister(pctrl->pctrl);
-
 	return ret;
 }
 
@@ -777,8 +774,6 @@
 
 	gpiochip_remove(&pctrl->chip);
 
-	pinctrl_unregister(pctrl->pctrl);
-
 	return 0;
 }
 
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 54a5402..9191727 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -277,7 +277,7 @@
 	.get_group_name		= pm8xxx_get_group_name,
 	.get_group_pins         = pm8xxx_get_group_pins,
 	.dt_node_to_map		= pinconf_generic_dt_node_to_map_group,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 };
 
 static int pm8xxx_get_functions_count(struct pinctrl_dev *pctldev)
@@ -820,7 +820,7 @@
 	pctrl->desc.custom_conf_items = pm8xxx_conf_items;
 #endif
 
-	pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl);
+	pctrl->pctrl = devm_pinctrl_register(&pdev->dev, &pctrl->desc, pctrl);
 	if (IS_ERR(pctrl->pctrl)) {
 		dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n");
 		return PTR_ERR(pctrl->pctrl);
@@ -836,7 +836,7 @@
 	ret = gpiochip_add_data(&pctrl->chip, pctrl);
 	if (ret) {
 		dev_err(&pdev->dev, "failed register gpiochip\n");
-		goto unregister_pinctrl;
+		return ret;
 	}
 
 	ret = gpiochip_add_pin_range(&pctrl->chip,
@@ -856,9 +856,6 @@
 unregister_gpiochip:
 	gpiochip_remove(&pctrl->chip);
 
-unregister_pinctrl:
-	pinctrl_unregister(pctrl->pctrl);
-
 	return ret;
 }
 
@@ -868,8 +865,6 @@
 
 	gpiochip_remove(&pctrl->chip);
 
-	pinctrl_unregister(pctrl->pctrl);
-
 	return 0;
 }
 
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
index 00ab63a..fb71fc3 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
@@ -107,6 +107,7 @@
  * @nr_groups: number of pin groups available.
  * @pmx_functions: list of pin functions parsed from device tree.
  * @nr_functions: number of pin functions available.
+ * @range: gpio range to register with pinctrl
  */
 struct exynos5440_pinctrl_priv_data {
 	void __iomem			*reg_base;
@@ -117,6 +118,7 @@
 	unsigned int			nr_groups;
 	const struct exynos5440_pmx_func	*pmx_functions;
 	unsigned int			nr_functions;
+	struct pinctrl_gpio_range	range;
 };
 
 /**
@@ -742,7 +744,6 @@
 	struct pinctrl_desc *ctrldesc;
 	struct pinctrl_dev *pctl_dev;
 	struct pinctrl_pin_desc *pindesc, *pdesc;
-	struct pinctrl_gpio_range grange;
 	char *pin_names;
 	int pin, ret;
 
@@ -788,18 +789,18 @@
 	if (ret)
 		return ret;
 
-	pctl_dev = pinctrl_register(ctrldesc, &pdev->dev, priv);
+	pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, priv);
 	if (IS_ERR(pctl_dev)) {
 		dev_err(&pdev->dev, "could not register pinctrl driver\n");
 		return PTR_ERR(pctl_dev);
 	}
 
-	grange.name = "exynos5440-pctrl-gpio-range";
-	grange.id = 0;
-	grange.base = 0;
-	grange.npins = EXYNOS5440_MAX_PINS;
-	grange.gc = priv->gc;
-	pinctrl_add_gpio_range(pctl_dev, &grange);
+	priv->range.name = "exynos5440-pctrl-gpio-range";
+	priv->range.id = 0;
+	priv->range.base = 0;
+	priv->range.npins = EXYNOS5440_MAX_PINS;
+	priv->range.gc = priv->gc;
+	pinctrl_add_gpio_range(pctl_dev, &priv->range);
 	return 0;
 }
 
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index 5cc97f8..ed0b708 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -884,7 +884,8 @@
 	if (ret)
 		return ret;
 
-	drvdata->pctl_dev = pinctrl_register(ctrldesc, &pdev->dev, drvdata);
+	drvdata->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc,
+						  drvdata);
 	if (IS_ERR(drvdata->pctl_dev)) {
 		dev_err(&pdev->dev, "could not register pinctrl driver\n");
 		return PTR_ERR(drvdata->pctl_dev);
diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c
index ee0c1f2..9b9cee0 100644
--- a/drivers/pinctrl/sh-pfc/core.c
+++ b/drivers/pinctrl/sh-pfc/core.c
@@ -175,6 +175,21 @@
 	BUG();
 }
 
+u32 sh_pfc_read_reg(struct sh_pfc *pfc, u32 reg, unsigned int width)
+{
+	return sh_pfc_read_raw_reg(sh_pfc_phys_to_virt(pfc, reg), width);
+}
+
+void sh_pfc_write_reg(struct sh_pfc *pfc, u32 reg, unsigned int width, u32 data)
+{
+	if (pfc->info->unlock_reg)
+		sh_pfc_write_raw_reg(
+			sh_pfc_phys_to_virt(pfc, pfc->info->unlock_reg), 32,
+			~data);
+
+	sh_pfc_write_raw_reg(sh_pfc_phys_to_virt(pfc, reg), width, data);
+}
+
 static void sh_pfc_config_reg_helper(struct sh_pfc *pfc,
 				     const struct pinmux_cfg_reg *crp,
 				     unsigned int in_pos,
@@ -585,12 +600,9 @@
 
 static int sh_pfc_remove(struct platform_device *pdev)
 {
-	struct sh_pfc *pfc = platform_get_drvdata(pdev);
-
 #ifdef CONFIG_PINCTRL_SH_PFC_GPIO
-	sh_pfc_unregister_gpiochip(pfc);
+	sh_pfc_unregister_gpiochip(platform_get_drvdata(pdev));
 #endif
-	sh_pfc_unregister_pinctrl(pfc);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h
index 62f53b2..dc1b2ad 100644
--- a/drivers/pinctrl/sh-pfc/core.h
+++ b/drivers/pinctrl/sh-pfc/core.h
@@ -50,18 +50,19 @@
 	struct sh_pfc_chip *func;
 #endif
 
-	struct sh_pfc_pinctrl *pinctrl;
 };
 
 int sh_pfc_register_gpiochip(struct sh_pfc *pfc);
 int sh_pfc_unregister_gpiochip(struct sh_pfc *pfc);
 
 int sh_pfc_register_pinctrl(struct sh_pfc *pfc);
-int sh_pfc_unregister_pinctrl(struct sh_pfc *pfc);
 
 u32 sh_pfc_read_raw_reg(void __iomem *mapped_reg, unsigned int reg_width);
 void sh_pfc_write_raw_reg(void __iomem *mapped_reg, unsigned int reg_width,
 			  u32 data);
+u32 sh_pfc_read_reg(struct sh_pfc *pfc, u32 reg, unsigned int width);
+void sh_pfc_write_reg(struct sh_pfc *pfc, u32 reg, unsigned int width,
+		      u32 data);
 
 int sh_pfc_get_pin_index(struct sh_pfc *pfc, unsigned int pin);
 int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type);
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
index 0f4d48f..eed8daa 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
@@ -21,16 +21,21 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/io.h>
 #include <linux/kernel.h>
 
 #include "core.h"
 #include "sh_pfc.h"
 
+/*
+ * All pins assigned to GPIO bank 3 can be used for SD interfaces in
+ * which case they support both 3.3V and 1.8V signalling.
+ */
 #define CPU_ALL_PORT(fn, sfx)						\
 	PORT_GP_32(0, fn, sfx),						\
 	PORT_GP_30(1, fn, sfx),						\
 	PORT_GP_30(2, fn, sfx),						\
-	PORT_GP_32(3, fn, sfx),						\
+	PORT_GP_CFG_32(3, fn, sfx, SH_PFC_PIN_CFG_IO_VOLTAGE),		\
 	PORT_GP_32(4, fn, sfx),						\
 	PORT_GP_32(5, fn, sfx)
 
@@ -4691,6 +4696,47 @@
 	"vin3_clk",
 };
 
+#define IOCTRL6 0x8c
+
+static int r8a7790_get_io_voltage(struct sh_pfc *pfc, unsigned int pin)
+{
+	u32 data, mask;
+
+	if (WARN(pin < RCAR_GP_PIN(3, 0) || pin > RCAR_GP_PIN(3, 31), "invalid pin %#x", pin))
+		return -EINVAL;
+
+	data = ioread32(pfc->windows->virt + IOCTRL6),
+	/* Bits in IOCTRL6 are numbered in opposite order to pins */
+	mask = 0x80000000 >> (pin & 0x1f);
+
+	return (data & mask) ? 3300 : 1800;
+}
+
+static int r8a7790_set_io_voltage(struct sh_pfc *pfc, unsigned int pin, u16 mV)
+{
+	u32 data, mask;
+
+	if (WARN(pin < RCAR_GP_PIN(3, 0) || pin > RCAR_GP_PIN(3, 31), "invalid pin %#x", pin))
+		return -EINVAL;
+
+	if (mV != 1800 && mV != 3300)
+		return -EINVAL;
+
+	data = ioread32(pfc->windows->virt + IOCTRL6);
+	/* Bits in IOCTRL6 are numbered in opposite order to pins */
+	mask = 0x80000000 >> (pin & 0x1f);
+
+	if (mV == 3300)
+		data |= mask;
+	else
+		data &= ~mask;
+
+	iowrite32(~data, pfc->windows->virt); /* unlock reg */
+	iowrite32(data, pfc->windows->virt + IOCTRL6);
+
+	return 0;
+}
+
 static const struct sh_pfc_function pinmux_functions[] = {
 	SH_PFC_FUNCTION(audio_clk),
 	SH_PFC_FUNCTION(avb),
@@ -5690,8 +5736,14 @@
 	{ },
 };
 
+static const struct sh_pfc_soc_operations pinmux_ops = {
+	.get_io_voltage = r8a7790_get_io_voltage,
+	.set_io_voltage = r8a7790_set_io_voltage,
+};
+
 const struct sh_pfc_soc_info r8a7790_pinmux_info = {
 	.name = "r8a77900_pfc",
+	.ops = &pinmux_ops,
 	.unlock_reg = 0xe6060000, /* PMMR */
 
 	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
index 38912cf..8bc2cf0c 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
@@ -1682,6 +1682,179 @@
 static const unsigned int avb_avtp_match_b_mux[] = {
 	AVB_AVTP_MATCH_B_MARK,
 };
+/* - DU --------------------------------------------------------------------- */
+static const unsigned int du0_rgb666_pins[] = {
+	/* R[7:2], G[7:2], B[7:2] */
+	RCAR_GP_PIN(2, 7),  RCAR_GP_PIN(2, 6),  RCAR_GP_PIN(2, 5),
+	RCAR_GP_PIN(2, 4),  RCAR_GP_PIN(2, 3),  RCAR_GP_PIN(2, 2),
+	RCAR_GP_PIN(2, 15), RCAR_GP_PIN(2, 14), RCAR_GP_PIN(2, 13),
+	RCAR_GP_PIN(2, 12), RCAR_GP_PIN(2, 11), RCAR_GP_PIN(2, 10),
+	RCAR_GP_PIN(2, 23), RCAR_GP_PIN(2, 22), RCAR_GP_PIN(2, 21),
+	RCAR_GP_PIN(2, 20), RCAR_GP_PIN(2, 19), RCAR_GP_PIN(2, 18),
+};
+static const unsigned int du0_rgb666_mux[] = {
+	DU0_DR7_MARK, DU0_DR6_MARK, DU0_DR5_MARK, DU0_DR4_MARK,
+	DU0_DR3_MARK, DU0_DR2_MARK,
+	DU0_DG7_MARK, DU0_DG6_MARK, DU0_DG5_MARK, DU0_DG4_MARK,
+	DU0_DG3_MARK, DU0_DG2_MARK,
+	DU0_DB7_MARK, DU0_DB6_MARK, DU0_DB5_MARK, DU0_DB4_MARK,
+	DU0_DB3_MARK, DU0_DB2_MARK,
+};
+static const unsigned int du0_rgb888_pins[] = {
+	/* R[7:0], G[7:0], B[7:0] */
+	RCAR_GP_PIN(2, 7),  RCAR_GP_PIN(2, 6),  RCAR_GP_PIN(2, 5),
+	RCAR_GP_PIN(2, 4),  RCAR_GP_PIN(2, 3),  RCAR_GP_PIN(2, 2),
+	RCAR_GP_PIN(2, 1),  RCAR_GP_PIN(2, 0),
+	RCAR_GP_PIN(2, 15), RCAR_GP_PIN(2, 14), RCAR_GP_PIN(2, 13),
+	RCAR_GP_PIN(2, 12), RCAR_GP_PIN(2, 11), RCAR_GP_PIN(2, 10),
+	RCAR_GP_PIN(2, 9),  RCAR_GP_PIN(2, 8),
+	RCAR_GP_PIN(2, 23), RCAR_GP_PIN(2, 22), RCAR_GP_PIN(2, 21),
+	RCAR_GP_PIN(2, 20), RCAR_GP_PIN(2, 19), RCAR_GP_PIN(2, 18),
+	RCAR_GP_PIN(2, 17), RCAR_GP_PIN(2, 16),
+};
+static const unsigned int du0_rgb888_mux[] = {
+	DU0_DR7_MARK, DU0_DR6_MARK, DU0_DR5_MARK, DU0_DR4_MARK,
+	DU0_DR3_MARK, DU0_DR2_MARK, DU0_DR1_MARK, DU0_DR0_MARK,
+	DU0_DG7_MARK, DU0_DG6_MARK, DU0_DG5_MARK, DU0_DG4_MARK,
+	DU0_DG3_MARK, DU0_DG2_MARK, DU0_DG1_MARK, DU0_DG0_MARK,
+	DU0_DB7_MARK, DU0_DB6_MARK, DU0_DB5_MARK, DU0_DB4_MARK,
+	DU0_DB3_MARK, DU0_DB2_MARK, DU0_DB1_MARK, DU0_DB0_MARK,
+};
+static const unsigned int du0_clk0_out_pins[] = {
+	/* DOTCLKOUT0 */
+	RCAR_GP_PIN(2, 25),
+};
+static const unsigned int du0_clk0_out_mux[] = {
+	DU0_DOTCLKOUT0_MARK
+};
+static const unsigned int du0_clk1_out_pins[] = {
+	/* DOTCLKOUT1 */
+	RCAR_GP_PIN(2, 26),
+};
+static const unsigned int du0_clk1_out_mux[] = {
+	DU0_DOTCLKOUT1_MARK
+};
+static const unsigned int du0_clk_in_pins[] = {
+	/* CLKIN */
+	RCAR_GP_PIN(2, 24),
+};
+static const unsigned int du0_clk_in_mux[] = {
+	DU0_DOTCLKIN_MARK
+};
+static const unsigned int du0_sync_pins[] = {
+	/* EXVSYNC/VSYNC, EXHSYNC/HSYNC */
+	RCAR_GP_PIN(2, 28), RCAR_GP_PIN(2, 27),
+};
+static const unsigned int du0_sync_mux[] = {
+	DU0_EXVSYNC_DU0_VSYNC_MARK, DU0_EXHSYNC_DU0_HSYNC_MARK
+};
+static const unsigned int du0_oddf_pins[] = {
+	/* EXODDF/ODDF/DISP/CDE */
+	RCAR_GP_PIN(2, 29),
+};
+static const unsigned int du0_oddf_mux[] = {
+	DU0_EXODDF_DU0_ODDF_DISP_CDE_MARK,
+};
+static const unsigned int du0_cde_pins[] = {
+	/* CDE */
+	RCAR_GP_PIN(2, 31),
+};
+static const unsigned int du0_cde_mux[] = {
+	DU0_CDE_MARK,
+};
+static const unsigned int du0_disp_pins[] = {
+	/* DISP */
+	RCAR_GP_PIN(2, 30),
+};
+static const unsigned int du0_disp_mux[] = {
+	DU0_DISP_MARK
+};
+static const unsigned int du1_rgb666_pins[] = {
+	/* R[7:2], G[7:2], B[7:2] */
+	RCAR_GP_PIN(4, 7),  RCAR_GP_PIN(4, 6),  RCAR_GP_PIN(4, 5),
+	RCAR_GP_PIN(4, 4),  RCAR_GP_PIN(4, 3),  RCAR_GP_PIN(4, 2),
+	RCAR_GP_PIN(4, 15), RCAR_GP_PIN(4, 14), RCAR_GP_PIN(4, 13),
+	RCAR_GP_PIN(4, 12), RCAR_GP_PIN(4, 11), RCAR_GP_PIN(4, 10),
+	RCAR_GP_PIN(4, 23), RCAR_GP_PIN(4, 22), RCAR_GP_PIN(4, 21),
+	RCAR_GP_PIN(4, 20), RCAR_GP_PIN(4, 19), RCAR_GP_PIN(4, 18),
+};
+static const unsigned int du1_rgb666_mux[] = {
+	DU1_DR7_MARK, DU1_DR6_MARK, DU1_DR5_MARK, DU1_DR4_MARK,
+	DU1_DR3_MARK, DU1_DR2_MARK,
+	DU1_DG7_MARK, DU1_DG6_MARK, DU1_DG5_MARK, DU1_DG4_MARK,
+	DU1_DG3_MARK, DU1_DG2_MARK,
+	DU1_DB7_MARK, DU1_DB6_MARK, DU1_DB5_MARK, DU1_DB4_MARK,
+	DU1_DB3_MARK, DU1_DB2_MARK,
+};
+static const unsigned int du1_rgb888_pins[] = {
+	/* R[7:0], G[7:0], B[7:0] */
+	RCAR_GP_PIN(4, 7),  RCAR_GP_PIN(4, 6),  RCAR_GP_PIN(4, 5),
+	RCAR_GP_PIN(4, 4),  RCAR_GP_PIN(4, 3),  RCAR_GP_PIN(4, 2),
+	RCAR_GP_PIN(4, 1),  RCAR_GP_PIN(4, 0),
+	RCAR_GP_PIN(4, 15), RCAR_GP_PIN(4, 14), RCAR_GP_PIN(4, 13),
+	RCAR_GP_PIN(4, 12), RCAR_GP_PIN(4, 11), RCAR_GP_PIN(4, 10),
+	RCAR_GP_PIN(4, 9),  RCAR_GP_PIN(4, 8),
+	RCAR_GP_PIN(4, 23), RCAR_GP_PIN(4, 22), RCAR_GP_PIN(4, 21),
+	RCAR_GP_PIN(4, 20), RCAR_GP_PIN(4, 19), RCAR_GP_PIN(4, 18),
+	RCAR_GP_PIN(4, 17), RCAR_GP_PIN(4, 16),
+};
+static const unsigned int du1_rgb888_mux[] = {
+	DU1_DR7_MARK, DU1_DR6_MARK, DU1_DR5_MARK, DU1_DR4_MARK,
+	DU1_DR3_MARK, DU1_DR2_MARK, DU1_DR1_MARK, DU1_DR0_MARK,
+	DU1_DG7_MARK, DU1_DG6_MARK, DU1_DG5_MARK, DU1_DG4_MARK,
+	DU1_DG3_MARK, DU1_DG2_MARK, DU1_DG1_MARK, DU1_DG0_MARK,
+	DU1_DB7_MARK, DU1_DB6_MARK, DU1_DB5_MARK, DU1_DB4_MARK,
+	DU1_DB3_MARK, DU1_DB2_MARK, DU1_DB1_MARK, DU1_DB0_MARK,
+};
+static const unsigned int du1_clk0_out_pins[] = {
+	/* DOTCLKOUT0 */
+	RCAR_GP_PIN(4, 25),
+};
+static const unsigned int du1_clk0_out_mux[] = {
+	DU1_DOTCLKOUT0_MARK
+};
+static const unsigned int du1_clk1_out_pins[] = {
+	/* DOTCLKOUT1 */
+	RCAR_GP_PIN(4, 26),
+};
+static const unsigned int du1_clk1_out_mux[] = {
+	DU1_DOTCLKOUT1_MARK
+};
+static const unsigned int du1_clk_in_pins[] = {
+	/* DOTCLKIN */
+	RCAR_GP_PIN(4, 24),
+};
+static const unsigned int du1_clk_in_mux[] = {
+	DU1_DOTCLKIN_MARK
+};
+static const unsigned int du1_sync_pins[] = {
+	/* EXVSYNC/VSYNC, EXHSYNC/HSYNC */
+	RCAR_GP_PIN(4, 28), RCAR_GP_PIN(4, 27),
+};
+static const unsigned int du1_sync_mux[] = {
+	DU1_EXVSYNC_DU1_VSYNC_MARK, DU1_EXHSYNC_DU1_HSYNC_MARK
+};
+static const unsigned int du1_oddf_pins[] = {
+	/* EXODDF/ODDF/DISP/CDE */
+	RCAR_GP_PIN(4, 29),
+};
+static const unsigned int du1_oddf_mux[] = {
+	DU1_EXODDF_DU1_ODDF_DISP_CDE_MARK,
+};
+static const unsigned int du1_cde_pins[] = {
+	/* CDE */
+	RCAR_GP_PIN(4, 31),
+};
+static const unsigned int du1_cde_mux[] = {
+	DU1_CDE_MARK
+};
+static const unsigned int du1_disp_pins[] = {
+	/* DISP */
+	RCAR_GP_PIN(4, 30),
+};
+static const unsigned int du1_disp_mux[] = {
+	DU1_DISP_MARK
+};
 /* - ETH -------------------------------------------------------------------- */
 static const unsigned int eth_link_pins[] = {
 	/* LINK */
@@ -3364,6 +3537,24 @@
 	SH_PFC_PIN_GROUP(avb_avtp_match),
 	SH_PFC_PIN_GROUP(avb_avtp_capture_b),
 	SH_PFC_PIN_GROUP(avb_avtp_match_b),
+	SH_PFC_PIN_GROUP(du0_rgb666),
+	SH_PFC_PIN_GROUP(du0_rgb888),
+	SH_PFC_PIN_GROUP(du0_clk0_out),
+	SH_PFC_PIN_GROUP(du0_clk1_out),
+	SH_PFC_PIN_GROUP(du0_clk_in),
+	SH_PFC_PIN_GROUP(du0_sync),
+	SH_PFC_PIN_GROUP(du0_oddf),
+	SH_PFC_PIN_GROUP(du0_cde),
+	SH_PFC_PIN_GROUP(du0_disp),
+	SH_PFC_PIN_GROUP(du1_rgb666),
+	SH_PFC_PIN_GROUP(du1_rgb888),
+	SH_PFC_PIN_GROUP(du1_clk0_out),
+	SH_PFC_PIN_GROUP(du1_clk1_out),
+	SH_PFC_PIN_GROUP(du1_clk_in),
+	SH_PFC_PIN_GROUP(du1_sync),
+	SH_PFC_PIN_GROUP(du1_oddf),
+	SH_PFC_PIN_GROUP(du1_cde),
+	SH_PFC_PIN_GROUP(du1_disp),
 	SH_PFC_PIN_GROUP(eth_link),
 	SH_PFC_PIN_GROUP(eth_magic),
 	SH_PFC_PIN_GROUP(eth_mdio),
@@ -3622,6 +3813,30 @@
 	"avb_avtp_match_b",
 };
 
+static const char * const du0_groups[] = {
+	"du0_rgb666",
+	"du0_rgb888",
+	"du0_clk0_out",
+	"du0_clk1_out",
+	"du0_clk_in",
+	"du0_sync",
+	"du0_oddf",
+	"du0_cde",
+	"du0_disp",
+};
+
+static const char * const du1_groups[] = {
+	"du1_rgb666",
+	"du1_rgb888",
+	"du1_clk0_out",
+	"du1_clk1_out",
+	"du1_clk_in",
+	"du1_sync",
+	"du1_oddf",
+	"du1_cde",
+	"du1_disp",
+};
+
 static const char * const eth_groups[] = {
 	"eth_link",
 	"eth_magic",
@@ -3969,6 +4184,8 @@
 static const struct sh_pfc_function pinmux_functions[] = {
 	SH_PFC_FUNCTION(audio_clk),
 	SH_PFC_FUNCTION(avb),
+	SH_PFC_FUNCTION(du0),
+	SH_PFC_FUNCTION(du1),
 	SH_PFC_FUNCTION(eth),
 	SH_PFC_FUNCTION(hscif0),
 	SH_PFC_FUNCTION(hscif1),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
index 5979dab..44632b1 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
@@ -14,14 +14,14 @@
 #include "sh_pfc.h"
 
 #define CPU_ALL_PORT(fn, sfx)						\
-	PORT_GP_16(0, fn, sfx),						\
-	PORT_GP_28(1, fn, sfx),						\
-	PORT_GP_15(2, fn, sfx),						\
-	PORT_GP_16(3, fn, sfx),						\
-	PORT_GP_18(4, fn, sfx),						\
-	PORT_GP_26(5, fn, sfx),						\
-	PORT_GP_32(6, fn, sfx),						\
-	PORT_GP_4(7, fn, sfx)
+	PORT_GP_CFG_16(0, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_28(1, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_15(2, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_16(3, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_18(4, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_26(5, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_32(6, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH),	\
+	PORT_GP_CFG_4(7, fn, sfx, SH_PFC_PIN_CFG_DRIVE_STRENGTH)
 /*
  * F_() : just information
  * FM() : macro for FN_xxx / xxx_MARK
@@ -4564,6 +4564,207 @@
 	{ },
 };
 
+static const struct pinmux_drive_reg pinmux_drive_regs[] = {
+	{ PINMUX_DRIVE_REG("DRVCTRL3", 0xe606030c) {
+		{ RCAR_GP_PIN(2,  9),  8, 3 },	/* AVB_MDC */
+		{ RCAR_GP_PIN(2, 10),  4, 3 },	/* AVB_MAGIC */
+		{ RCAR_GP_PIN(2, 11),  0, 3 },	/* AVB_PHY_INT */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL4", 0xe6060310) {
+		{ RCAR_GP_PIN(2, 12), 28, 3 },	/* AVB_LINK */
+		{ RCAR_GP_PIN(2, 13), 24, 3 },	/* AVB_AVTP_MATCH */
+		{ RCAR_GP_PIN(2, 14), 20, 3 },	/* AVB_AVTP_CAPTURE */
+		{ RCAR_GP_PIN(2,  0), 16, 3 },	/* IRQ0 */
+		{ RCAR_GP_PIN(2,  1), 12, 3 },	/* IRQ1 */
+		{ RCAR_GP_PIN(2,  2),  8, 3 },	/* IRQ2 */
+		{ RCAR_GP_PIN(2,  3),  4, 3 },	/* IRQ3 */
+		{ RCAR_GP_PIN(2,  4),  0, 3 },	/* IRQ4 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL5", 0xe6060314) {
+		{ RCAR_GP_PIN(2,  5), 28, 3 },	/* IRQ5 */
+		{ RCAR_GP_PIN(2,  6), 24, 3 },	/* PWM0 */
+		{ RCAR_GP_PIN(2,  7), 20, 3 },	/* PWM1 */
+		{ RCAR_GP_PIN(2,  8), 16, 3 },	/* PWM2 */
+		{ RCAR_GP_PIN(1,  0), 12, 3 },	/* A0 */
+		{ RCAR_GP_PIN(1,  1),  8, 3 },	/* A1 */
+		{ RCAR_GP_PIN(1,  2),  4, 3 },	/* A2 */
+		{ RCAR_GP_PIN(1,  3),  0, 3 },	/* A3 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL6", 0xe6060318) {
+		{ RCAR_GP_PIN(1,  4), 28, 3 },	/* A4 */
+		{ RCAR_GP_PIN(1,  5), 24, 3 },	/* A5 */
+		{ RCAR_GP_PIN(1,  6), 20, 3 },	/* A6 */
+		{ RCAR_GP_PIN(1,  7), 16, 3 },	/* A7 */
+		{ RCAR_GP_PIN(1,  8), 12, 3 },	/* A8 */
+		{ RCAR_GP_PIN(1,  9),  8, 3 },	/* A9 */
+		{ RCAR_GP_PIN(1, 10),  4, 3 },	/* A10 */
+		{ RCAR_GP_PIN(1, 11),  0, 3 },	/* A11 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL7", 0xe606031c) {
+		{ RCAR_GP_PIN(1, 12), 28, 3 },	/* A12 */
+		{ RCAR_GP_PIN(1, 13), 24, 3 },	/* A13 */
+		{ RCAR_GP_PIN(1, 14), 20, 3 },	/* A14 */
+		{ RCAR_GP_PIN(1, 15), 16, 3 },	/* A15 */
+		{ RCAR_GP_PIN(1, 16), 12, 3 },	/* A16 */
+		{ RCAR_GP_PIN(1, 17),  8, 3 },	/* A17 */
+		{ RCAR_GP_PIN(1, 18),  4, 3 },	/* A18 */
+		{ RCAR_GP_PIN(1, 19),  0, 3 },	/* A19 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL8", 0xe6060320) {
+		{ RCAR_GP_PIN(1, 20), 24, 3 },	/* CS0 */
+		{ RCAR_GP_PIN(1, 21), 20, 3 },	/* CS1_A26 */
+		{ RCAR_GP_PIN(1, 22), 16, 3 },	/* BS */
+		{ RCAR_GP_PIN(1, 23), 12, 3 },	/* RD */
+		{ RCAR_GP_PIN(1, 24),  8, 3 },	/* RD_WR */
+		{ RCAR_GP_PIN(1, 25),  4, 3 },	/* WE0 */
+		{ RCAR_GP_PIN(1, 26),  0, 3 },	/* WE1 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL9", 0xe6060324) {
+		{ RCAR_GP_PIN(1, 27), 28, 3 },	/* EX_WAIT0 */
+		{ RCAR_GP_PIN(0,  0), 20, 3 },	/* D0 */
+		{ RCAR_GP_PIN(0,  1), 16, 3 },	/* D1 */
+		{ RCAR_GP_PIN(0,  2), 12, 3 },	/* D2 */
+		{ RCAR_GP_PIN(0,  3),  8, 3 },	/* D3 */
+		{ RCAR_GP_PIN(0,  4),  4, 3 },	/* D4 */
+		{ RCAR_GP_PIN(0,  5),  0, 3 },	/* D5 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL10", 0xe6060328) {
+		{ RCAR_GP_PIN(0,  6), 28, 3 },	/* D6 */
+		{ RCAR_GP_PIN(0,  7), 24, 3 },	/* D7 */
+		{ RCAR_GP_PIN(0,  8), 20, 3 },	/* D8 */
+		{ RCAR_GP_PIN(0,  9), 16, 3 },	/* D9 */
+		{ RCAR_GP_PIN(0, 10), 12, 3 },	/* D10 */
+		{ RCAR_GP_PIN(0, 11),  8, 3 },	/* D11 */
+		{ RCAR_GP_PIN(0, 12),  4, 3 },	/* D12 */
+		{ RCAR_GP_PIN(0, 13),  0, 3 },	/* D13 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL11", 0xe606032c) {
+		{ RCAR_GP_PIN(0, 14), 28, 3 },	/* D14 */
+		{ RCAR_GP_PIN(0, 15), 24, 3 },	/* D15 */
+		{ RCAR_GP_PIN(7,  0), 20, 3 },	/* AVS1 */
+		{ RCAR_GP_PIN(7,  1), 16, 3 },	/* AVS2 */
+		{ RCAR_GP_PIN(7,  2), 12, 3 },	/* HDMI0_CEC */
+		{ RCAR_GP_PIN(7,  3),  8, 3 },	/* HDMI1_CEC */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL13", 0xe6060334) {
+		{ RCAR_GP_PIN(3,  0), 20, 3 },	/* SD0_CLK */
+		{ RCAR_GP_PIN(3,  1), 16, 3 },	/* SD0_CMD */
+		{ RCAR_GP_PIN(3,  2), 12, 3 },	/* SD0_DAT0 */
+		{ RCAR_GP_PIN(3,  3),  8, 3 },	/* SD0_DAT1 */
+		{ RCAR_GP_PIN(3,  4),  4, 3 },	/* SD0_DAT2 */
+		{ RCAR_GP_PIN(3,  5),  0, 3 },	/* SD0_DAT3 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL14", 0xe6060338) {
+		{ RCAR_GP_PIN(3,  6), 28, 3 },	/* SD1_CLK */
+		{ RCAR_GP_PIN(3,  7), 24, 3 },	/* SD1_CMD */
+		{ RCAR_GP_PIN(3,  8), 20, 3 },	/* SD1_DAT0 */
+		{ RCAR_GP_PIN(3,  9), 16, 3 },	/* SD1_DAT1 */
+		{ RCAR_GP_PIN(3, 10), 12, 3 },	/* SD1_DAT2 */
+		{ RCAR_GP_PIN(3, 11),  8, 3 },	/* SD1_DAT3 */
+		{ RCAR_GP_PIN(4,  0),  4, 3 },	/* SD2_CLK */
+		{ RCAR_GP_PIN(4,  1),  0, 3 },	/* SD2_CMD */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL15", 0xe606033c) {
+		{ RCAR_GP_PIN(4,  2), 28, 3 },	/* SD2_DAT0 */
+		{ RCAR_GP_PIN(4,  3), 24, 3 },	/* SD2_DAT1 */
+		{ RCAR_GP_PIN(4,  4), 20, 3 },	/* SD2_DAT2 */
+		{ RCAR_GP_PIN(4,  5), 16, 3 },	/* SD2_DAT3 */
+		{ RCAR_GP_PIN(4,  6), 12, 3 },	/* SD2_DS */
+		{ RCAR_GP_PIN(4,  7),  8, 3 },	/* SD3_CLK */
+		{ RCAR_GP_PIN(4,  8),  4, 3 },	/* SD3_CMD */
+		{ RCAR_GP_PIN(4,  9),  0, 3 },	/* SD3_DAT0 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL16", 0xe6060340) {
+		{ RCAR_GP_PIN(4, 10), 28, 3 },	/* SD3_DAT1 */
+		{ RCAR_GP_PIN(4, 11), 24, 3 },	/* SD3_DAT2 */
+		{ RCAR_GP_PIN(4, 12), 20, 3 },	/* SD3_DAT3 */
+		{ RCAR_GP_PIN(4, 13), 16, 3 },	/* SD3_DAT4 */
+		{ RCAR_GP_PIN(4, 14), 12, 3 },	/* SD3_DAT5 */
+		{ RCAR_GP_PIN(4, 15),  8, 3 },	/* SD3_DAT6 */
+		{ RCAR_GP_PIN(4, 16),  4, 3 },	/* SD3_DAT7 */
+		{ RCAR_GP_PIN(4, 17),  0, 3 },	/* SD3_DS */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL17", 0xe6060344) {
+		{ RCAR_GP_PIN(3, 12), 28, 3 },	/* SD0_CD */
+		{ RCAR_GP_PIN(3, 13), 24, 3 },	/* SD0_WP */
+		{ RCAR_GP_PIN(3, 14), 20, 3 },	/* SD1_CD */
+		{ RCAR_GP_PIN(3, 15), 16, 3 },	/* SD1_WP */
+		{ RCAR_GP_PIN(5,  0), 12, 3 },	/* SCK0 */
+		{ RCAR_GP_PIN(5,  1),  8, 3 },	/* RX0 */
+		{ RCAR_GP_PIN(5,  2),  4, 3 },	/* TX0 */
+		{ RCAR_GP_PIN(5,  3),  0, 3 },	/* CTS0 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL18", 0xe6060348) {
+		{ RCAR_GP_PIN(5,  4), 28, 3 },	/* RTS0_TANS */
+		{ RCAR_GP_PIN(5,  5), 24, 3 },	/* RX1 */
+		{ RCAR_GP_PIN(5,  6), 20, 3 },	/* TX1 */
+		{ RCAR_GP_PIN(5,  7), 16, 3 },	/* CTS1 */
+		{ RCAR_GP_PIN(5,  8), 12, 3 },	/* RTS1_TANS */
+		{ RCAR_GP_PIN(5,  9),  8, 3 },	/* SCK2 */
+		{ RCAR_GP_PIN(5, 10),  4, 3 },	/* TX2 */
+		{ RCAR_GP_PIN(5, 11),  0, 3 },	/* RX2 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL19", 0xe606034c) {
+		{ RCAR_GP_PIN(5, 12), 28, 3 },	/* HSCK0 */
+		{ RCAR_GP_PIN(5, 13), 24, 3 },	/* HRX0 */
+		{ RCAR_GP_PIN(5, 14), 20, 3 },	/* HTX0 */
+		{ RCAR_GP_PIN(5, 15), 16, 3 },	/* HCTS0 */
+		{ RCAR_GP_PIN(5, 16), 12, 3 },	/* HRTS0 */
+		{ RCAR_GP_PIN(5, 17),  8, 3 },	/* MSIOF0_SCK */
+		{ RCAR_GP_PIN(5, 18),  4, 3 },	/* MSIOF0_SYNC */
+		{ RCAR_GP_PIN(5, 19),  0, 3 },	/* MSIOF0_SS1 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL20", 0xe6060350) {
+		{ RCAR_GP_PIN(5, 20), 28, 3 },	/* MSIOF0_TXD */
+		{ RCAR_GP_PIN(5, 21), 24, 3 },	/* MSIOF0_SS2 */
+		{ RCAR_GP_PIN(5, 22), 20, 3 },	/* MSIOF0_RXD */
+		{ RCAR_GP_PIN(5, 23), 16, 3 },	/* MLB_CLK */
+		{ RCAR_GP_PIN(5, 24), 12, 3 },	/* MLB_SIG */
+		{ RCAR_GP_PIN(5, 25),  8, 3 },	/* MLB_DAT */
+		{ RCAR_GP_PIN(6,  0),  0, 3 },	/* SSI_SCK01239 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL21", 0xe6060354) {
+		{ RCAR_GP_PIN(6,  1), 28, 3 },	/* SSI_WS01239 */
+		{ RCAR_GP_PIN(6,  2), 24, 3 },	/* SSI_SDATA0 */
+		{ RCAR_GP_PIN(6,  3), 20, 3 },	/* SSI_SDATA1 */
+		{ RCAR_GP_PIN(6,  4), 16, 3 },	/* SSI_SDATA2 */
+		{ RCAR_GP_PIN(6,  5), 12, 3 },	/* SSI_SCK34 */
+		{ RCAR_GP_PIN(6,  6),  8, 3 },	/* SSI_WS34 */
+		{ RCAR_GP_PIN(6,  7),  4, 3 },	/* SSI_SDATA3 */
+		{ RCAR_GP_PIN(6,  8),  0, 3 },	/* SSI_SCK4 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL22", 0xe6060358) {
+		{ RCAR_GP_PIN(6,  9), 28, 3 },	/* SSI_WS4 */
+		{ RCAR_GP_PIN(6, 10), 24, 3 },	/* SSI_SDATA4 */
+		{ RCAR_GP_PIN(6, 11), 20, 3 },	/* SSI_SCK5 */
+		{ RCAR_GP_PIN(6, 12), 16, 3 },	/* SSI_WS5 */
+		{ RCAR_GP_PIN(6, 13), 12, 3 },	/* SSI_SDATA5 */
+		{ RCAR_GP_PIN(6, 14),  8, 3 },	/* SSI_SCK6 */
+		{ RCAR_GP_PIN(6, 15),  4, 3 },	/* SSI_WS6 */
+		{ RCAR_GP_PIN(6, 16),  0, 3 },	/* SSI_SDATA6 */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL23", 0xe606035c) {
+		{ RCAR_GP_PIN(6, 17), 28, 3 },	/* SSI_SCK78 */
+		{ RCAR_GP_PIN(6, 18), 24, 3 },	/* SSI_WS78 */
+		{ RCAR_GP_PIN(6, 19), 20, 3 },	/* SSI_SDATA7 */
+		{ RCAR_GP_PIN(6, 20), 16, 3 },	/* SSI_SDATA8 */
+		{ RCAR_GP_PIN(6, 21), 12, 3 },	/* SSI_SDATA9 */
+		{ RCAR_GP_PIN(6, 22),  8, 3 },	/* AUDIO_CLKA */
+		{ RCAR_GP_PIN(6, 23),  4, 3 },	/* AUDIO_CLKB */
+		{ RCAR_GP_PIN(6, 24),  0, 3 },	/* USB0_PWEN */
+	} },
+	{ PINMUX_DRIVE_REG("DRVCTRL24", 0xe6060360) {
+		{ RCAR_GP_PIN(6, 25), 28, 3 },	/* USB0_OVC */
+		{ RCAR_GP_PIN(6, 26), 24, 3 },	/* USB1_PWEN */
+		{ RCAR_GP_PIN(6, 27), 20, 3 },	/* USB1_OVC */
+		{ RCAR_GP_PIN(6, 28), 16, 3 },	/* USB30_PWEN */
+		{ RCAR_GP_PIN(6, 29), 12, 3 },	/* USB30_OVC */
+		{ RCAR_GP_PIN(6, 30),  8, 3 },	/* USB31_PWEN */
+		{ RCAR_GP_PIN(6, 31),  4, 3 },	/* USB31_OVC */
+	} },
+	{ },
+};
+
 const struct sh_pfc_soc_info r8a7795_pinmux_info = {
 	.name = "r8a77950_pfc",
 	.unlock_reg = 0xe6060000, /* PMMR */
@@ -4578,6 +4779,7 @@
 	.nr_functions = ARRAY_SIZE(pinmux_functions),
 
 	.cfg_regs = pinmux_config_regs,
+	.drive_regs = pinmux_drive_regs,
 
 	.pinmux_data = pinmux_data,
 	.pinmux_data_size = ARRAY_SIZE(pinmux_data),
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 87b0a59..fdb445d 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -476,6 +476,91 @@
 	.gpio_set_direction	= sh_pfc_gpio_set_direction,
 };
 
+static u32 sh_pfc_pinconf_find_drive_strength_reg(struct sh_pfc *pfc,
+		unsigned int pin, unsigned int *offset, unsigned int *size)
+{
+	const struct pinmux_drive_reg_field *field;
+	const struct pinmux_drive_reg *reg;
+	unsigned int i;
+
+	for (reg = pfc->info->drive_regs; reg->reg; ++reg) {
+		for (i = 0; i < ARRAY_SIZE(reg->fields); ++i) {
+			field = &reg->fields[i];
+
+			if (field->size && field->pin == pin) {
+				*offset = field->offset;
+				*size = field->size;
+
+				return reg->reg;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int sh_pfc_pinconf_get_drive_strength(struct sh_pfc *pfc,
+					     unsigned int pin)
+{
+	unsigned long flags;
+	unsigned int offset;
+	unsigned int size;
+	u32 reg;
+	u32 val;
+
+	reg = sh_pfc_pinconf_find_drive_strength_reg(pfc, pin, &offset, &size);
+	if (!reg)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pfc->lock, flags);
+	val = sh_pfc_read_reg(pfc, reg, 32);
+	spin_unlock_irqrestore(&pfc->lock, flags);
+
+	val = (val >> offset) & GENMASK(size - 1, 0);
+
+	/* Convert the value to mA based on a full drive strength value of 24mA.
+	 * We can make the full value configurable later if needed.
+	 */
+	return (val + 1) * (size == 2 ? 6 : 3);
+}
+
+static int sh_pfc_pinconf_set_drive_strength(struct sh_pfc *pfc,
+					     unsigned int pin, u16 strength)
+{
+	unsigned long flags;
+	unsigned int offset;
+	unsigned int size;
+	unsigned int step;
+	u32 reg;
+	u32 val;
+
+	reg = sh_pfc_pinconf_find_drive_strength_reg(pfc, pin, &offset, &size);
+	if (!reg)
+		return -EINVAL;
+
+	step = size == 2 ? 6 : 3;
+
+	if (strength < step || strength > 24)
+		return -EINVAL;
+
+	/* Convert the value from mA based on a full drive strength value of
+	 * 24mA. We can make the full value configurable later if needed.
+	 */
+	strength = strength / step - 1;
+
+	spin_lock_irqsave(&pfc->lock, flags);
+
+	val = sh_pfc_read_reg(pfc, reg, 32);
+	val &= ~GENMASK(offset + size - 1, offset);
+	val |= strength << offset;
+
+	sh_pfc_write_reg(pfc, reg, 32, val);
+
+	spin_unlock_irqrestore(&pfc->lock, flags);
+
+	return 0;
+}
+
 /* Check whether the requested parameter is supported for a pin. */
 static bool sh_pfc_pinconf_validate(struct sh_pfc *pfc, unsigned int _pin,
 				    enum pin_config_param param)
@@ -493,6 +578,9 @@
 	case PIN_CONFIG_BIAS_PULL_DOWN:
 		return pin->configs & SH_PFC_PIN_CFG_PULL_DOWN;
 
+	case PIN_CONFIG_DRIVE_STRENGTH:
+		return pin->configs & SH_PFC_PIN_CFG_DRIVE_STRENGTH;
+
 	case PIN_CONFIG_POWER_SOURCE:
 		return pin->configs & SH_PFC_PIN_CFG_IO_VOLTAGE;
 
@@ -532,6 +620,17 @@
 		break;
 	}
 
+	case PIN_CONFIG_DRIVE_STRENGTH: {
+		int ret;
+
+		ret = sh_pfc_pinconf_get_drive_strength(pfc, _pin);
+		if (ret < 0)
+			return ret;
+
+		*config = ret;
+		break;
+	}
+
 	case PIN_CONFIG_POWER_SOURCE: {
 		int ret;
 
@@ -584,6 +683,18 @@
 
 			break;
 
+		case PIN_CONFIG_DRIVE_STRENGTH: {
+			unsigned int arg =
+				pinconf_to_config_argument(configs[i]);
+			int ret;
+
+			ret = sh_pfc_pinconf_set_drive_strength(pfc, _pin, arg);
+			if (ret < 0)
+				return ret;
+
+			break;
+		}
+
 		case PIN_CONFIG_POWER_SOURCE: {
 			unsigned int arg =
 				pinconf_to_config_argument(configs[i]);
@@ -678,7 +789,6 @@
 		return -ENOMEM;
 
 	pmx->pfc = pfc;
-	pfc->pinctrl = pmx;
 
 	ret = sh_pfc_map_pins(pfc, pmx);
 	if (ret < 0)
@@ -692,19 +802,9 @@
 	pmx->pctl_desc.pins = pmx->pins;
 	pmx->pctl_desc.npins = pfc->info->nr_pins;
 
-	pmx->pctl = pinctrl_register(&pmx->pctl_desc, pfc->dev, pmx);
+	pmx->pctl = devm_pinctrl_register(pfc->dev, &pmx->pctl_desc, pmx);
 	if (IS_ERR(pmx->pctl))
 		return PTR_ERR(pmx->pctl);
 
 	return 0;
 }
-
-int sh_pfc_unregister_pinctrl(struct sh_pfc *pfc)
-{
-	struct sh_pfc_pinctrl *pmx = pfc->pinctrl;
-
-	pinctrl_unregister(pmx->pctl);
-
-	pfc->pinctrl = NULL;
-	return 0;
-}
diff --git a/drivers/pinctrl/sh-pfc/sh_pfc.h b/drivers/pinctrl/sh-pfc/sh_pfc.h
index a490834..656ea32 100644
--- a/drivers/pinctrl/sh-pfc/sh_pfc.h
+++ b/drivers/pinctrl/sh-pfc/sh_pfc.h
@@ -28,6 +28,7 @@
 #define SH_PFC_PIN_CFG_PULL_UP		(1 << 2)
 #define SH_PFC_PIN_CFG_PULL_DOWN	(1 << 3)
 #define SH_PFC_PIN_CFG_IO_VOLTAGE	(1 << 4)
+#define SH_PFC_PIN_CFG_DRIVE_STRENGTH	(1 << 5)
 #define SH_PFC_PIN_CFG_NO_GPIO		(1 << 31)
 
 struct sh_pfc_pin {
@@ -131,6 +132,21 @@
 		{ var_fw0, var_fwn, 0 }, \
 	.enum_ids = (const u16 [])
 
+struct pinmux_drive_reg_field {
+	u16 pin;
+	u8 offset;
+	u8 size;
+};
+
+struct pinmux_drive_reg {
+	u32 reg;
+	const struct pinmux_drive_reg_field fields[8];
+};
+
+#define PINMUX_DRIVE_REG(name, r) \
+	.reg = r, \
+	.fields =
+
 struct pinmux_data_reg {
 	u32 reg;
 	u8 reg_width;
@@ -199,6 +215,7 @@
 #endif
 
 	const struct pinmux_cfg_reg *cfg_regs;
+	const struct pinmux_drive_reg *drive_regs;
 	const struct pinmux_data_reg *data_regs;
 
 	const u16 *pinmux_data;
@@ -276,7 +293,7 @@
  *   - msel: Module selector
  */
 #define PINMUX_IPSR_MSEL(ipsr, fn, msel)				\
-	PINMUX_DATA(fn##_MARK, FN_##msel, FN_##ipsr, FN_##fn)
+	PINMUX_DATA(fn##_MARK, FN_##msel, FN_##fn, FN_##ipsr)
 
 /*
  * Describe a pinmux configuration for a single-function pin with GPIO
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index 0afaf79..4db52ba 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -395,7 +395,7 @@
 	spear_pinctrl_desc.pins = machdata->pins;
 	spear_pinctrl_desc.npins = machdata->npins;
 
-	pmx->pctl = pinctrl_register(&spear_pinctrl_desc, &pdev->dev, pmx);
+	pmx->pctl = devm_pinctrl_register(&pdev->dev, &spear_pinctrl_desc, pmx);
 	if (IS_ERR(pmx->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pmx->pctl);
@@ -403,12 +403,3 @@
 
 	return 0;
 }
-
-int spear_pinctrl_remove(struct platform_device *pdev)
-{
-	struct spear_pmx *pmx = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pmx->pctl);
-
-	return 0;
-}
diff --git a/drivers/pinctrl/spear/pinctrl-spear.h b/drivers/pinctrl/spear/pinctrl-spear.h
index 27c2cc8..aa5cf70 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.h
+++ b/drivers/pinctrl/spear/pinctrl-spear.h
@@ -197,7 +197,6 @@
 				 unsigned count, u16 reg);
 int spear_pinctrl_probe(struct platform_device *pdev,
 			struct spear_pinctrl_machdata *machdata);
-int spear_pinctrl_remove(struct platform_device *pdev);
 
 #define SPEAR_PIN_0_TO_101		\
 	PINCTRL_PIN(0, "PLGPIO0"),	\
diff --git a/drivers/pinctrl/spear/pinctrl-spear1310.c b/drivers/pinctrl/spear/pinctrl-spear1310.c
index 92611bb..1821068 100644
--- a/drivers/pinctrl/spear/pinctrl-spear1310.c
+++ b/drivers/pinctrl/spear/pinctrl-spear1310.c
@@ -2704,18 +2704,12 @@
 	return spear_pinctrl_probe(pdev, &spear1310_machdata);
 }
 
-static int spear1310_pinctrl_remove(struct platform_device *pdev)
-{
-	return spear_pinctrl_remove(pdev);
-}
-
 static struct platform_driver spear1310_pinctrl_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = spear1310_pinctrl_of_match,
 	},
 	.probe = spear1310_pinctrl_probe,
-	.remove = spear1310_pinctrl_remove,
 };
 
 static int __init spear1310_pinctrl_init(void)
diff --git a/drivers/pinctrl/spear/pinctrl-spear1340.c b/drivers/pinctrl/spear/pinctrl-spear1340.c
index f842e9d..c01fb23 100644
--- a/drivers/pinctrl/spear/pinctrl-spear1340.c
+++ b/drivers/pinctrl/spear/pinctrl-spear1340.c
@@ -2020,18 +2020,12 @@
 	return spear_pinctrl_probe(pdev, &spear1340_machdata);
 }
 
-static int spear1340_pinctrl_remove(struct platform_device *pdev)
-{
-	return spear_pinctrl_remove(pdev);
-}
-
 static struct platform_driver spear1340_pinctrl_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = spear1340_pinctrl_of_match,
 	},
 	.probe = spear1340_pinctrl_probe,
-	.remove = spear1340_pinctrl_remove,
 };
 
 static int __init spear1340_pinctrl_init(void)
diff --git a/drivers/pinctrl/spear/pinctrl-spear300.c b/drivers/pinctrl/spear/pinctrl-spear300.c
index d998a2c..111148d 100644
--- a/drivers/pinctrl/spear/pinctrl-spear300.c
+++ b/drivers/pinctrl/spear/pinctrl-spear300.c
@@ -677,18 +677,12 @@
 	return 0;
 }
 
-static int spear300_pinctrl_remove(struct platform_device *pdev)
-{
-	return spear_pinctrl_remove(pdev);
-}
-
 static struct platform_driver spear300_pinctrl_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = spear300_pinctrl_of_match,
 	},
 	.probe = spear300_pinctrl_probe,
-	.remove = spear300_pinctrl_remove,
 };
 
 static int __init spear300_pinctrl_init(void)
diff --git a/drivers/pinctrl/spear/pinctrl-spear310.c b/drivers/pinctrl/spear/pinctrl-spear310.c
index 609b18a..a7b0000 100644
--- a/drivers/pinctrl/spear/pinctrl-spear310.c
+++ b/drivers/pinctrl/spear/pinctrl-spear310.c
@@ -400,18 +400,12 @@
 	return 0;
 }
 
-static int spear310_pinctrl_remove(struct platform_device *pdev)
-{
-	return spear_pinctrl_remove(pdev);
-}
-
 static struct platform_driver spear310_pinctrl_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = spear310_pinctrl_of_match,
 	},
 	.probe = spear310_pinctrl_probe,
-	.remove = spear310_pinctrl_remove,
 };
 
 static int __init spear310_pinctrl_init(void)
diff --git a/drivers/pinctrl/spear/pinctrl-spear320.c b/drivers/pinctrl/spear/pinctrl-spear320.c
index c071144..e2b3817 100644
--- a/drivers/pinctrl/spear/pinctrl-spear320.c
+++ b/drivers/pinctrl/spear/pinctrl-spear320.c
@@ -3441,18 +3441,12 @@
 	return 0;
 }
 
-static int spear320_pinctrl_remove(struct platform_device *pdev)
-{
-	return spear_pinctrl_remove(pdev);
-}
-
 static struct platform_driver spear320_pinctrl_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = spear320_pinctrl_of_match,
 	},
 	.probe = spear320_pinctrl_probe,
-	.remove = spear320_pinctrl_remove,
 };
 
 static int __init spear320_pinctrl_init(void)
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 8deb566..ae9fab8 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -358,7 +358,7 @@
 		ret = stm32_pctrl_dt_subnode_to_map(pctldev, np, map,
 				&reserved_maps, num_maps);
 		if (ret < 0) {
-			pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+			pinctrl_utils_free_map(pctldev, *map, *num_maps);
 			return ret;
 		}
 	}
@@ -396,7 +396,7 @@
 
 static const struct pinctrl_ops stm32_pctrl_ops = {
 	.dt_node_to_map		= stm32_pctrl_dt_node_to_map,
-	.dt_free_map		= pinctrl_utils_dt_free_map,
+	.dt_free_map		= pinctrl_utils_free_map,
 	.get_groups_count	= stm32_pctrl_get_groups_count,
 	.get_group_name		= stm32_pctrl_get_group_name,
 	.get_group_pins		= stm32_pctrl_get_group_pins,
@@ -454,6 +454,29 @@
 	clk_disable(bank->clk);
 }
 
+static void stm32_pmx_get_mode(struct stm32_gpio_bank *bank,
+		int pin, u32 *mode, u32 *alt)
+{
+	u32 val;
+	int alt_shift = (pin % 8) * 4;
+	int alt_offset = STM32_GPIO_AFRL + (pin / 8) * 4;
+	unsigned long flags;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+
+	val = readl_relaxed(bank->base + alt_offset);
+	val &= GENMASK(alt_shift + 3, alt_shift);
+	*alt = val >> alt_shift;
+
+	val = readl_relaxed(bank->base + STM32_GPIO_MODER);
+	val &= GENMASK(pin * 2 + 1, pin * 2);
+	*mode = val >> (pin * 2);
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+}
+
 static int stm32_pmx_set_mux(struct pinctrl_dev *pctldev,
 			    unsigned function,
 			    unsigned group)
@@ -525,6 +548,24 @@
 	clk_disable(bank->clk);
 }
 
+static u32 stm32_pconf_get_driving(struct stm32_gpio_bank *bank,
+	unsigned int offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+
+	val = readl_relaxed(bank->base + STM32_GPIO_TYPER);
+	val &= BIT(offset);
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+
+	return (val >> offset);
+}
+
 static void stm32_pconf_set_speed(struct stm32_gpio_bank *bank,
 	unsigned offset, u32 speed)
 {
@@ -543,6 +584,24 @@
 	clk_disable(bank->clk);
 }
 
+static u32 stm32_pconf_get_speed(struct stm32_gpio_bank *bank,
+	unsigned int offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+
+	val = readl_relaxed(bank->base + STM32_GPIO_SPEEDR);
+	val &= GENMASK(offset * 2 + 1, offset * 2);
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+
+	return (val >> (offset * 2));
+}
+
 static void stm32_pconf_set_bias(struct stm32_gpio_bank *bank,
 	unsigned offset, u32 bias)
 {
@@ -561,6 +620,57 @@
 	clk_disable(bank->clk);
 }
 
+static u32 stm32_pconf_get_bias(struct stm32_gpio_bank *bank,
+	unsigned int offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+
+	val = readl_relaxed(bank->base + STM32_GPIO_PUPDR);
+	val &= GENMASK(offset * 2 + 1, offset * 2);
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+
+	return (val >> (offset * 2));
+}
+
+static bool stm32_pconf_input_get(struct stm32_gpio_bank *bank,
+	unsigned int offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+
+	val = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+
+	return val;
+}
+
+static bool stm32_pconf_output_get(struct stm32_gpio_bank *bank,
+	unsigned int offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	clk_enable(bank->clk);
+	spin_lock_irqsave(&bank->lock, flags);
+	val = !!(readl_relaxed(bank->base + STM32_GPIO_ODR) & BIT(offset));
+
+	spin_unlock_irqrestore(&bank->lock, flags);
+	clk_disable(bank->clk);
+
+	return val;
+}
+
 static int stm32_pconf_parse_conf(struct pinctrl_dev *pctldev,
 		unsigned int pin, enum pin_config_param param,
 		enum pin_config_param arg)
@@ -634,9 +744,73 @@
 	return 0;
 }
 
+static void stm32_pconf_dbg_show(struct pinctrl_dev *pctldev,
+				 struct seq_file *s,
+				 unsigned int pin)
+{
+	struct pinctrl_gpio_range *range;
+	struct stm32_gpio_bank *bank;
+	int offset;
+	u32 mode, alt, drive, speed, bias;
+	static const char * const modes[] = {
+			"input", "output", "alternate", "analog" };
+	static const char * const speeds[] = {
+			"low", "medium", "high", "very high" };
+	static const char * const biasing[] = {
+			"floating", "pull up", "pull down", "" };
+	bool val;
+
+	range = pinctrl_find_gpio_range_from_pin_nolock(pctldev, pin);
+	bank = gpio_range_to_bank(range);
+	offset = stm32_gpio_pin(pin);
+
+	stm32_pmx_get_mode(bank, offset, &mode, &alt);
+	bias = stm32_pconf_get_bias(bank, offset);
+
+	seq_printf(s, "%s ", modes[mode]);
+
+	switch (mode) {
+	/* input */
+	case 0:
+		val = stm32_pconf_input_get(bank, offset);
+		seq_printf(s, "- %s - %s",
+			   val ? "high" : "low",
+			   biasing[bias]);
+		break;
+
+	/* output */
+	case 1:
+		drive = stm32_pconf_get_driving(bank, offset);
+		speed = stm32_pconf_get_speed(bank, offset);
+		val = stm32_pconf_output_get(bank, offset);
+		seq_printf(s, "- %s - %s - %s - %s %s",
+			   val ? "high" : "low",
+			   drive ? "open drain" : "push pull",
+			   biasing[bias],
+			   speeds[speed], "speed");
+		break;
+
+	/* alternate */
+	case 2:
+		drive = stm32_pconf_get_driving(bank, offset);
+		speed = stm32_pconf_get_speed(bank, offset);
+		seq_printf(s, "%d - %s - %s - %s %s", alt,
+			   drive ? "open drain" : "push pull",
+			   biasing[bias],
+			   speeds[speed], "speed");
+		break;
+
+	/* analog */
+	case 3:
+		break;
+	}
+}
+
+
 static const struct pinconf_ops stm32_pconf_ops = {
 	.pin_config_group_get	= stm32_pconf_group_get,
 	.pin_config_group_set	= stm32_pconf_group_set,
+	.pin_config_dbg_show	= stm32_pconf_dbg_show,
 };
 
 static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
@@ -813,10 +987,11 @@
 	pctl->pctl_desc.pmxops = &stm32_pmx_ops;
 	pctl->dev = &pdev->dev;
 
-	pctl->pctl_dev = pinctrl_register(&pctl->pctl_desc, &pdev->dev, pctl);
-	if (!pctl->pctl_dev) {
+	pctl->pctl_dev = devm_pinctrl_register(&pdev->dev, &pctl->pctl_desc,
+					       pctl);
+	if (IS_ERR(pctl->pctl_dev)) {
 		dev_err(&pdev->dev, "Failed pinctrl registration\n");
-		return -EINVAL;
+		return PTR_ERR(pctl->pctl_dev);
 	}
 
 	for (i = 0; i < pctl->nbanks; i++)
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 3b017db..54455af 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -933,18 +933,15 @@
 	pctrl_desc->pctlops = &sunxi_pctrl_ops;
 	pctrl_desc->pmxops =  &sunxi_pmx_ops;
 
-	pctl->pctl_dev = pinctrl_register(pctrl_desc,
-					  &pdev->dev, pctl);
+	pctl->pctl_dev = devm_pinctrl_register(&pdev->dev, pctrl_desc, pctl);
 	if (IS_ERR(pctl->pctl_dev)) {
 		dev_err(&pdev->dev, "couldn't register pinctrl driver\n");
 		return PTR_ERR(pctl->pctl_dev);
 	}
 
 	pctl->chip = devm_kzalloc(&pdev->dev, sizeof(*pctl->chip), GFP_KERNEL);
-	if (!pctl->chip) {
-		ret = -ENOMEM;
-		goto pinctrl_error;
-	}
+	if (!pctl->chip)
+		return -ENOMEM;
 
 	last_pin = pctl->desc->pins[pctl->desc->npins - 1].pin.number;
 	pctl->chip->owner = THIS_MODULE;
@@ -966,7 +963,7 @@
 
 	ret = gpiochip_add_data(pctl->chip, pctl);
 	if (ret)
-		goto pinctrl_error;
+		return ret;
 
 	for (i = 0; i < pctl->desc->npins; i++) {
 		const struct sunxi_desc_pin *pin = pctl->desc->pins + i;
@@ -1044,7 +1041,5 @@
 	clk_disable_unprepare(clk);
 gpiochip_error:
 	gpiochip_remove(pctl->chip);
-pinctrl_error:
-	pinctrl_unregister(pctl->pctl_dev);
 	return ret;
 }
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra-xusb.c b/drivers/pinctrl/tegra/pinctrl-tegra-xusb.c
index 946cda3..6f68a9e 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra-xusb.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra-xusb.c
@@ -267,7 +267,7 @@
 	.get_group_name = tegra_xusb_padctl_get_group_name,
 	.get_group_pins = tegra_xusb_padctl_get_group_pins,
 	.dt_node_to_map = tegra_xusb_padctl_dt_node_to_map,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int tegra_xusb_padctl_get_functions_count(struct pinctrl_dev *pinctrl)
@@ -914,7 +914,8 @@
 	padctl->desc.confops = &tegra_xusb_padctl_pinconf_ops;
 	padctl->desc.owner = THIS_MODULE;
 
-	padctl->pinctrl = pinctrl_register(&padctl->desc, &pdev->dev, padctl);
+	padctl->pinctrl = devm_pinctrl_register(&pdev->dev, &padctl->desc,
+						padctl);
 	if (IS_ERR(padctl->pinctrl)) {
 		dev_err(&pdev->dev, "failed to register pincontrol\n");
 		err = PTR_ERR(padctl->pinctrl);
@@ -924,7 +925,7 @@
 	phy = devm_phy_create(&pdev->dev, NULL, &pcie_phy_ops);
 	if (IS_ERR(phy)) {
 		err = PTR_ERR(phy);
-		goto unregister;
+		goto reset;
 	}
 
 	padctl->phys[TEGRA_XUSB_PADCTL_PCIE] = phy;
@@ -933,7 +934,7 @@
 	phy = devm_phy_create(&pdev->dev, NULL, &sata_phy_ops);
 	if (IS_ERR(phy)) {
 		err = PTR_ERR(phy);
-		goto unregister;
+		goto reset;
 	}
 
 	padctl->phys[TEGRA_XUSB_PADCTL_SATA] = phy;
@@ -944,13 +945,11 @@
 	if (IS_ERR(padctl->provider)) {
 		err = PTR_ERR(padctl->provider);
 		dev_err(&pdev->dev, "failed to register PHYs: %d\n", err);
-		goto unregister;
+		goto reset;
 	}
 
 	return 0;
 
-unregister:
-	pinctrl_unregister(padctl->pinctrl);
 reset:
 	reset_control_assert(padctl->rst);
 	return err;
@@ -962,8 +961,6 @@
 	struct tegra_xusb_padctl *padctl = platform_get_drvdata(pdev);
 	int err;
 
-	pinctrl_unregister(padctl->pinctrl);
-
 	err = reset_control_assert(padctl->rst);
 	if (err < 0)
 		dev_err(&pdev->dev, "failed to assert reset: %d\n", err);
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c
index 4938882..6e82b29 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra.c
@@ -215,7 +215,7 @@
 		ret = tegra_pinctrl_dt_subnode_to_map(pctldev, np, map,
 						      &reserved_maps, num_maps);
 		if (ret < 0) {
-			pinctrl_utils_dt_free_map(pctldev, *map,
+			pinctrl_utils_free_map(pctldev, *map,
 				*num_maps);
 			of_node_put(np);
 			return ret;
@@ -233,7 +233,7 @@
 	.pin_dbg_show = tegra_pinctrl_pin_dbg_show,
 #endif
 	.dt_node_to_map = tegra_pinctrl_dt_node_to_map,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int tegra_pinctrl_get_funcs_count(struct pinctrl_dev *pctldev)
@@ -417,7 +417,7 @@
 		return -ENOTSUPP;
 	}
 
-	if (*reg < 0 || *bit > 31) {
+	if (*reg < 0 || *bit < 0)  {
 		if (report_err) {
 			const char *prop = "unknown";
 			int i;
@@ -625,6 +625,22 @@
 	.owner = THIS_MODULE,
 };
 
+static void tegra_pinctrl_clear_parked_bits(struct tegra_pmx *pmx)
+{
+	int i = 0;
+	const struct tegra_pingroup *g;
+	u32 val;
+
+	for (i = 0; i < pmx->soc->ngroups; ++i) {
+		if (pmx->soc->groups[i].parked_reg >= 0) {
+			g = &pmx->soc->groups[i];
+			val = pmx_readl(pmx, g->parked_bank, g->parked_reg);
+			val &= ~(1 << g->parked_bit);
+			pmx_writel(pmx, val, g->parked_bank, g->parked_reg);
+		}
+	}
+}
+
 static bool gpio_node_has_range(void)
 {
 	struct device_node *np;
@@ -719,12 +735,14 @@
 			return PTR_ERR(pmx->regs[i]);
 	}
 
-	pmx->pctl = pinctrl_register(&tegra_pinctrl_desc, &pdev->dev, pmx);
+	pmx->pctl = devm_pinctrl_register(&pdev->dev, &tegra_pinctrl_desc, pmx);
 	if (IS_ERR(pmx->pctl)) {
 		dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
 		return PTR_ERR(pmx->pctl);
 	}
 
+	tegra_pinctrl_clear_parked_bits(pmx);
+
 	if (!gpio_node_has_range())
 		pinctrl_add_gpio_range(pmx->pctl, &tegra_pinctrl_gpio_range);
 
@@ -735,13 +753,3 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(tegra_pinctrl_probe);
-
-int tegra_pinctrl_remove(struct platform_device *pdev)
-{
-	struct tegra_pmx *pmx = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pmx->pctl);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(tegra_pinctrl_remove);
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.h b/drivers/pinctrl/tegra/pinctrl-tegra.h
index 1615db7..d2ced17 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra.h
+++ b/drivers/pinctrl/tegra/pinctrl-tegra.h
@@ -93,6 +93,9 @@
  * @tri_reg:		Tri-state register offset.
  * @tri_bank:		Tri-state register bank.
  * @tri_bit:		Tri-state register bit.
+ * @parked_reg:		Parked register offset. -1 if unsupported.
+ * @parked_bank:	Parked register bank. 0 if unsupported.
+ * @parked_bit:		Parked register bit. 0 if unsupported.
  * @einput_bit:		Enable-input register bit.
  * @odrain_bit:		Open-drain register bit.
  * @lock_bit:		Lock register bit.
@@ -135,13 +138,16 @@
 	s16 pupd_reg;
 	s16 tri_reg;
 	s16 drv_reg;
+	s16 parked_reg;
 	u32 mux_bank:2;
 	u32 pupd_bank:2;
 	u32 tri_bank:2;
 	u32 drv_bank:2;
+	u32 parked_bank:2;
 	s32 mux_bit:6;
 	s32 pupd_bit:6;
 	s32 tri_bit:6;
+	s32 parked_bit:6;
 	s32 einput_bit:6;
 	s32 odrain_bit:6;
 	s32 lock_bit:6;
@@ -189,6 +195,4 @@
 
 int tegra_pinctrl_probe(struct platform_device *pdev,
 			const struct tegra_pinctrl_soc_data *soc_data);
-int tegra_pinctrl_remove(struct platform_device *pdev);
-
 #endif
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra114.c b/drivers/pinctrl/tegra/pinctrl-tegra114.c
index 05e49d5..4851d16 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra114.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra114.c
@@ -1578,6 +1578,7 @@
 		.lock_bit = 7,						\
 		.ioreset_bit = PINGROUP_BIT_##ior(8),			\
 		.rcv_sel_bit = PINGROUP_BIT_##rcv_sel(9),		\
+		.parked_reg = -1,					\
 		.drv_reg = -1,						\
 	}
 
@@ -1598,6 +1599,7 @@
 		.rcv_sel_bit = -1,					\
 		.drv_reg = DRV_PINGROUP_REG(r),				\
 		.drv_bank = 0,						\
+		.parked_reg = -1,					\
 		.hsm_bit = hsm_b,					\
 		.schmitt_bit = schmitt_b,				\
 		.lpmd_bit = lpmd_b,					\
@@ -1863,7 +1865,6 @@
 		.of_match_table = tegra114_pinctrl_of_match,
 	},
 	.probe = tegra114_pinctrl_probe,
-	.remove = tegra_pinctrl_remove,
 };
 module_platform_driver(tegra114_pinctrl_driver);
 
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra124.c b/drivers/pinctrl/tegra/pinctrl-tegra124.c
index 7cd44c7..a0ce723 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra124.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra124.c
@@ -1747,6 +1747,7 @@
 		.lock_bit = 7,						\
 		.ioreset_bit = PINGROUP_BIT_##ior(8),			\
 		.rcv_sel_bit = PINGROUP_BIT_##rcv_sel(9),		\
+		.parked_reg = -1,					\
 		.drv_reg = -1,						\
 	}
 
@@ -1767,6 +1768,7 @@
 		.rcv_sel_bit = -1,					\
 		.drv_reg = DRV_PINGROUP_REG(r),				\
 		.drv_bank = 0,						\
+		.parked_reg = -1,					\
 		.hsm_bit = hsm_b,					\
 		.schmitt_bit = schmitt_b,				\
 		.lpmd_bit = lpmd_b,					\
@@ -2075,7 +2077,6 @@
 		.of_match_table = tegra124_pinctrl_of_match,
 	},
 	.probe = tegra124_pinctrl_probe,
-	.remove = tegra_pinctrl_remove,
 };
 module_platform_driver(tegra124_pinctrl_driver);
 
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra20.c b/drivers/pinctrl/tegra/pinctrl-tegra20.c
index 4833db4..09bad69 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra20.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra20.c
@@ -1994,6 +1994,7 @@
 		.tri_reg = ((tri_r) - TRISTATE_REG_A),		\
 		.tri_bank = 0,					\
 		.tri_bit = tri_b,				\
+		.parked_reg = -1,				\
 		.einput_bit = -1,				\
 		.odrain_bit = -1,				\
 		.lock_bit = -1,					\
@@ -2013,6 +2014,7 @@
 		.pupd_bank = 2,					\
 		.pupd_bit = pupd_b,				\
 		.drv_reg = -1,					\
+		.parked_reg = -1,				\
 	}
 
 /* Pin groups for drive strength registers (configurable version) */
@@ -2028,6 +2030,7 @@
 		.tri_reg = -1,					\
 		.drv_reg = ((r) - PINGROUP_REG_A),		\
 		.drv_bank = 3,					\
+		.parked_reg = -1,				\
 		.hsm_bit = hsm_b,				\
 		.schmitt_bit = schmitt_b,			\
 		.lpmd_bit = lpmd_b,				\
@@ -2242,7 +2245,6 @@
 		.of_match_table = tegra20_pinctrl_of_match,
 	},
 	.probe = tegra20_pinctrl_probe,
-	.remove = tegra_pinctrl_remove,
 };
 module_platform_driver(tegra20_pinctrl_driver);
 
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra210.c b/drivers/pinctrl/tegra/pinctrl-tegra210.c
index 252b464..2d856af 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra210.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra210.c
@@ -1310,6 +1310,9 @@
 		.lock_bit = 7,						\
 		.ioreset_bit = -1,					\
 		.rcv_sel_bit = PINGROUP_BIT_##e_io_hv(10),		\
+		.parked_reg = PINGROUP_REG(r),				\
+		.parked_bank = 1,					\
+		.parked_bit = 5,					\
 		.hsm_bit = PINGROUP_BIT_##hsm(9),			\
 		.schmitt_bit = 12,					\
 		.drvtype_bit = PINGROUP_BIT_##drvtype(13),		\
@@ -1342,6 +1345,7 @@
 		.rcv_sel_bit = -1,					\
 		.drv_reg = DRV_PINGROUP_REG(r),				\
 		.drv_bank = 0,						\
+		.parked_reg = -1,					\
 		.hsm_bit = -1,						\
 		.schmitt_bit = -1,					\
 		.lpmd_bit = -1,						\
@@ -1579,7 +1583,6 @@
 		.of_match_table = tegra210_pinctrl_of_match,
 	},
 	.probe = tegra210_pinctrl_probe,
-	.remove = tegra_pinctrl_remove,
 };
 module_platform_driver(tegra210_pinctrl_driver);
 
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra30.c b/drivers/pinctrl/tegra/pinctrl-tegra30.c
index 47b2fd8..fb7817f 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra30.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra30.c
@@ -2139,6 +2139,7 @@
 		.lock_bit = 7,						\
 		.ioreset_bit = PINGROUP_BIT_##ior(8),			\
 		.rcv_sel_bit = -1,					\
+		.parked_reg = -1,					\
 		.drv_reg = -1,						\
 	}
 
@@ -2159,6 +2160,7 @@
 		.rcv_sel_bit = -1,					\
 		.drv_reg = DRV_PINGROUP_REG(r),				\
 		.drv_bank = 0,						\
+		.parked_reg = -1,					\
 		.hsm_bit = hsm_b,					\
 		.schmitt_bit = schmitt_b,				\
 		.lpmd_bit = lpmd_b,					\
@@ -2498,7 +2500,6 @@
 		.of_match_table = tegra30_pinctrl_of_match,
 	},
 	.probe = tegra30_pinctrl_probe,
-	.remove = tegra_pinctrl_remove,
 };
 module_platform_driver(tegra30_pinctrl_driver);
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
index 589872c..9674009 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
@@ -115,7 +115,7 @@
 	.pin_dbg_show = uniphier_pctl_pin_dbg_show,
 #endif
 	.dt_node_to_map = pinconf_generic_dt_node_to_map_all,
-	.dt_free_map = pinctrl_utils_dt_free_map,
+	.dt_free_map = pinctrl_utils_free_map,
 };
 
 static int uniphier_conf_pin_bias_get(struct pinctrl_dev *pctldev,
@@ -665,7 +665,7 @@
 	desc->pmxops = &uniphier_pmxops;
 	desc->confops = &uniphier_confops;
 
-	priv->pctldev = pinctrl_register(desc, dev, priv);
+	priv->pctldev = devm_pinctrl_register(dev, desc, priv);
 	if (IS_ERR(priv->pctldev)) {
 		dev_err(dev, "failed to register UniPhier pinctrl driver\n");
 		return PTR_ERR(priv->pctldev);
@@ -676,13 +676,3 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(uniphier_pinctrl_probe);
-
-int uniphier_pinctrl_remove(struct platform_device *pdev)
-{
-	struct uniphier_pinctrl_priv *priv = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(priv->pctldev);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uniphier_pinctrl_remove);
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
index a7056dc..4a0439c 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
@@ -878,7 +878,6 @@
 
 static struct platform_driver ph1_ld4_pinctrl_driver = {
 	.probe = ph1_ld4_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = ph1_ld4_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
index 1824831..150d339 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
@@ -1266,7 +1266,6 @@
 
 static struct platform_driver ph1_ld6b_pinctrl_driver = {
 	.probe = ph1_ld6b_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = ph1_ld6b_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
index ec8e92d..b1f09e6 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
@@ -1552,7 +1552,6 @@
 
 static struct platform_driver ph1_pro4_pinctrl_driver = {
 	.probe = ph1_pro4_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = ph1_pro4_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
index e3d648e..3087f76 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
@@ -1343,7 +1343,6 @@
 
 static struct platform_driver ph1_pro5_pinctrl_driver = {
 	.probe = ph1_pro5_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = ph1_pro5_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
index bc00d75..e868030 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
@@ -1261,7 +1261,6 @@
 
 static struct platform_driver proxstream2_pinctrl_driver = {
 	.probe = proxstream2_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = proxstream2_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
index c3700a3..ceb7a98 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
@@ -786,7 +786,6 @@
 
 static struct platform_driver ph1_sld8_pinctrl_driver = {
 	.probe = ph1_sld8_pinctrl_probe,
-	.remove = uniphier_pinctrl_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = ph1_sld8_pinctrl_match,
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier.h b/drivers/pinctrl/uniphier/pinctrl-uniphier.h
index e1e98b8..a21154f 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier.h
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier.h
@@ -212,6 +212,4 @@
 			   struct pinctrl_desc *desc,
 			   struct uniphier_pinctrl_socdata *socdata);
 
-int uniphier_pinctrl_remove(struct platform_device *pdev);
-
 #endif /* __PINCTRL_UNIPHIER_H__ */
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 5c261bf..cbc6386 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -583,7 +583,7 @@
 
 	data->dev = &pdev->dev;
 
-	data->pctl_dev = pinctrl_register(&wmt_desc, &pdev->dev, data);
+	data->pctl_dev = devm_pinctrl_register(&pdev->dev, &wmt_desc, data);
 	if (IS_ERR(data->pctl_dev)) {
 		dev_err(&pdev->dev, "Failed to register pinctrl\n");
 		return PTR_ERR(data->pctl_dev);
@@ -592,7 +592,7 @@
 	err = gpiochip_add_data(&data->gpio_chip, data);
 	if (err) {
 		dev_err(&pdev->dev, "could not add GPIO chip\n");
-		goto fail_gpio;
+		return err;
 	}
 
 	err = gpiochip_add_pin_range(&data->gpio_chip, dev_name(data->dev),
@@ -606,8 +606,6 @@
 
 fail_range:
 	gpiochip_remove(&data->gpio_chip);
-fail_gpio:
-	pinctrl_unregister(data->pctl_dev);
 	return err;
 }
 
@@ -616,7 +614,6 @@
 	struct wmt_pinctrl_data *data = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&data->gpio_chip);
-	pinctrl_unregister(data->pctl_dev);
 
 	return 0;
 }
diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
index 125e569..b3ae30a 100644
--- a/drivers/platform/mips/Kconfig
+++ b/drivers/platform/mips/Kconfig
@@ -15,10 +15,6 @@
 
 if MIPS_PLATFORM_DEVICES
 
-config MIPS_ACPI
-	bool
-	default y if LOONGSON_MACH3X
-
 config CPU_HWMON
 	tristate "Loongson CPU HWMon Driver"
 	depends on LOONGSON_MACH3X
diff --git a/drivers/platform/mips/Makefile b/drivers/platform/mips/Makefile
index 4341284..8dfd039 100644
--- a/drivers/platform/mips/Makefile
+++ b/drivers/platform/mips/Makefile
@@ -1,2 +1 @@
-obj-$(CONFIG_MIPS_ACPI) += acpi_init.o
 obj-$(CONFIG_CPU_HWMON) += cpu_hwmon.o
diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
index 4993e19f..4300a55 100644
--- a/drivers/platform/mips/cpu_hwmon.c
+++ b/drivers/platform/mips/cpu_hwmon.c
@@ -20,9 +20,9 @@
 	u32 reg;
 
 	reg = LOONGSON_CHIPTEMP(cpu);
-	if (loongson_sysconf.cputype == Loongson_3A)
+	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1)
 		reg = (reg >> 8) & 0xff;
-	else if (loongson_sysconf.cputype == Loongson_3B)
+	else
 		reg = ((reg >> 8) & 0xff) - 100;
 
 	return (int)reg * 1000;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 6804354..0ac520d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -49,7 +49,9 @@
 	u8	_pad_117[119 - 117];	/* 117-118 */
 	u8	fac119;			/* 119 */
 	u16	hcpua;			/* 120-121 */
-	u8	_pad_122[4096 - 122];	/* 122-4095 */
+	u8	_pad_122[124 - 122];	/* 122-123 */
+	u32	hmfai;			/* 124-127 */
+	u8	_pad_128[4096 - 128];	/* 128-4095 */
 } __packed __aligned(PAGE_SIZE);
 
 static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
@@ -155,6 +157,8 @@
 	sclp.mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0;
 	sclp.mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0;
 	sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0;
+
+	sclp.hmfai = sccb->hmfai;
 }
 
 /*
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 520ed1d..4fd7f98 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -144,16 +144,16 @@
 		struct dw_dma_slave *slave = c->tx_param;
 
 		slave->dma_dev = &dma_dev->dev;
-		slave->src_master = 1;
-		slave->dst_master = 0;
+		slave->m_master = 0;
+		slave->p_master = 1;
 	}
 
 	if (c->rx_param) {
 		struct dw_dma_slave *slave = c->rx_param;
 
 		slave->dma_dev = &dma_dev->dev;
-		slave->src_master = 1;
-		slave->dst_master = 0;
+		slave->m_master = 0;
+		slave->p_master = 1;
 	}
 
 	spi_pdata.dma_filter = lpss_dma_filter;
diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c
index 799634b..1146ff4 100644
--- a/drivers/thunderbolt/ctl.c
+++ b/drivers/thunderbolt/ctl.c
@@ -249,7 +249,7 @@
 		 * cfg_read/cfg_write.
 		 */
 		tb_ctl_WARN(ctl,
-			"CFG_ERROR(%llx:%x): Invalid config space of offset\n",
+			"CFG_ERROR(%llx:%x): Invalid config space or offset\n",
 			res->response_route, res->response_port);
 		return;
 	case TB_CFG_ERROR_NO_SUCH_PORT:
diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c
index 0dde34e..2b9602c 100644
--- a/drivers/thunderbolt/eeprom.c
+++ b/drivers/thunderbolt/eeprom.c
@@ -221,7 +221,7 @@
 	u8 micro1:4;
 	u8 micro3;
 
-	/* BYTES 5-6, TODO: verify (find hardware that has these set) */
+	/* BYTES 6-7, TODO: verify (find hardware that has these set) */
 	u8 peer_port_rid:4;
 	u8 unknown3:3;
 	bool has_peer_port:1;
@@ -388,6 +388,11 @@
 		sw->ports[4].link_nr = 1;
 		sw->ports[3].dual_link_port = &sw->ports[4];
 		sw->ports[4].dual_link_port = &sw->ports[3];
+
+		/* Port 5 is inaccessible on this gen 1 controller */
+		if (sw->config.device_id == PCI_DEVICE_ID_INTEL_LIGHT_RIDGE)
+			sw->ports[5].disabled = true;
+
 		return 0;
 	}
 
@@ -444,6 +449,7 @@
 	return tb_drom_parse_entries(sw);
 err:
 	kfree(sw->drom);
+	sw->drom = NULL;
 	return -EIO;
 
 }
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 20a41f7..9c15344 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -37,7 +37,8 @@
  */
 static void ring_interrupt_active(struct tb_ring *ring, bool active)
 {
-	int reg = REG_RING_INTERRUPT_BASE + ring_interrupt_index(ring) / 32;
+	int reg = REG_RING_INTERRUPT_BASE +
+		  ring_interrupt_index(ring) / 32 * 4;
 	int bit = ring_interrupt_index(ring) & 31;
 	int mask = 1 << bit;
 	u32 old, new;
@@ -564,7 +565,7 @@
 	/* cannot fail - table is allocated bin pcim_iomap_regions */
 	nhi->iobase = pcim_iomap_table(pdev)[0];
 	nhi->hop_count = ioread32(nhi->iobase + REG_HOP_COUNT) & 0x3ff;
-	if (nhi->hop_count != 12)
+	if (nhi->hop_count != 12 && nhi->hop_count != 32)
 		dev_warn(&pdev->dev, "unexpected hop count: %d\n",
 			 nhi->hop_count);
 	INIT_WORK(&nhi->interrupt_work, nhi_interrupt_work);
@@ -633,16 +634,24 @@
 static struct pci_device_id nhi_ids[] = {
 	/*
 	 * We have to specify class, the TB bridges use the same device and
-	 * vendor (sub)id.
+	 * vendor (sub)id on gen 1 and gen 2 controllers.
 	 */
 	{
 		.class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
-		.vendor = PCI_VENDOR_ID_INTEL, .device = 0x1547,
+		.vendor = PCI_VENDOR_ID_INTEL,
+		.device = PCI_DEVICE_ID_INTEL_LIGHT_RIDGE,
 		.subvendor = 0x2222, .subdevice = 0x1111,
 	},
 	{
 		.class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
-		.vendor = PCI_VENDOR_ID_INTEL, .device = 0x156c,
+		.vendor = PCI_VENDOR_ID_INTEL,
+		.device = PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
+		.subvendor = 0x2222, .subdevice = 0x1111,
+	},
+	{
+		.class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
+		.vendor = PCI_VENDOR_ID_INTEL,
+		.device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI,
 		.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID,
 	},
 	{ 0,}
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index aeb9829..1e116f5 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -293,9 +293,9 @@
 	if (active) {
 		data = data & 0xFFFFFF83;
 		switch (sw->config.device_id) {
-		case 0x1513:
-		case 0x151a:
-		case 0x1549:
+		case PCI_DEVICE_ID_INTEL_LIGHT_RIDGE:
+		case PCI_DEVICE_ID_INTEL_EAGLE_RIDGE:
+		case PCI_DEVICE_ID_INTEL_PORT_RIDGE:
 			break;
 		default:
 			data |= 4;
@@ -350,7 +350,7 @@
 		return NULL;
 
 	sw->tb = tb;
-	if (tb_cfg_read(tb->ctl, &sw->config, route, 0, 2, 0, 5))
+	if (tb_cfg_read(tb->ctl, &sw->config, route, 0, TB_CFG_SWITCH, 0, 5))
 		goto err;
 	tb_info(tb,
 		"initializing Switch at %#llx (depth: %d, up port: %d)\n",
@@ -370,7 +370,9 @@
 		tb_sw_warn(sw, "unknown switch vendor id %#x\n",
 			   sw->config.vendor_id);
 
-	if (sw->config.device_id != 0x1547 && sw->config.device_id != 0x1549)
+	if (sw->config.device_id != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
+	    sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
+	    sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE)
 		tb_sw_warn(sw, "unsupported switch device id %#x\n",
 			   sw->config.device_id);
 
@@ -425,9 +427,9 @@
 }
 
 /**
- * tb_sw_set_unpplugged() - set is_unplugged on switch and downstream switches
+ * tb_sw_set_unplugged() - set is_unplugged on switch and downstream switches
  */
-void tb_sw_set_unpplugged(struct tb_switch *sw)
+void tb_sw_set_unplugged(struct tb_switch *sw)
 {
 	int i;
 	if (sw == sw->tb->root_switch) {
@@ -441,7 +443,7 @@
 	sw->is_unplugged = true;
 	for (i = 0; i <= sw->config.max_port_number; i++) {
 		if (!tb_is_upstream_port(&sw->ports[i]) && sw->ports[i].remote)
-			tb_sw_set_unpplugged(sw->ports[i].remote->sw);
+			tb_sw_set_unplugged(sw->ports[i].remote->sw);
 	}
 }
 
@@ -483,7 +485,7 @@
 			|| tb_switch_resume(port->remote->sw)) {
 			tb_port_warn(port,
 				     "lost during suspend, disconnecting\n");
-			tb_sw_set_unpplugged(port->remote->sw);
+			tb_sw_set_unplugged(port->remote->sw);
 		}
 	}
 	return 0;
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index d2c3fe3..24b6d30 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -246,7 +246,7 @@
 	if (ev->unplug) {
 		if (port->remote) {
 			tb_port_info(port, "unplugged\n");
-			tb_sw_set_unpplugged(port->remote->sw);
+			tb_sw_set_unplugged(port->remote->sw);
 			tb_free_invalid_tunnels(tb);
 			tb_switch_free(port->remote->sw);
 			port->remote = NULL;
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index 8b0d7cf..61d57ba 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -226,7 +226,7 @@
 void tb_switch_suspend(struct tb_switch *sw);
 int tb_switch_resume(struct tb_switch *sw);
 int tb_switch_reset(struct tb *tb, u64 route);
-void tb_sw_set_unpplugged(struct tb_switch *sw);
+void tb_sw_set_unplugged(struct tb_switch *sw);
 struct tb_switch *get_switch_at_route(struct tb_switch *sw, u64 route);
 
 int tb_wait_for_port(struct tb_port *port, bool wait_if_unplugged);
diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index 6577af7..1e2a4a8 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -30,7 +30,7 @@
 	TB_CAP_I2C		= 0x0005,
 	TB_CAP_PLUG_EVENTS	= 0x0105, /* also EEPROM */
 	TB_CAP_TIME2		= 0x0305,
-	TB_CAL_IECS		= 0x0405,
+	TB_CAP_IECS		= 0x0405,
 	TB_CAP_LINK_CONTROLLER	= 0x0605, /* also IECS */
 };
 
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 98862aa..5eea74d 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1454,13 +1454,13 @@
 		return -EINVAL;
 	}
 
-	rx_param->src_master = 1;
-	rx_param->dst_master = 0;
+	rx_param->m_master = 0;
+	rx_param->p_master = 1;
 
 	dma->rxconf.src_maxburst = 16;
 
-	tx_param->src_master = 1;
-	tx_param->dst_master = 0;
+	tx_param->m_master = 0;
+	tx_param->p_master = 1;
 
 	dma->txconf.dst_maxburst = 16;
 
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 13d4ed6..7711b26 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -900,6 +900,27 @@
 		controller serial port as your console (you want this!),
 		say Y.  Otherwise, say N.
 
+config SERIAL_PIC32
+	tristate "Microchip PIC32 serial support"
+	depends on MACH_PIC32
+	select SERIAL_CORE
+	help
+	  If you have a PIC32, this driver supports the serial ports.
+
+	  Say Y or M to use PIC32 serial ports, otherwise say N. Note that
+	  to use a serial port as a console, this must be included in kernel and
+	  not as a module.
+
+config SERIAL_PIC32_CONSOLE
+	bool "PIC32 serial console support"
+	depends on SERIAL_PIC32
+	select SERIAL_CORE_CONSOLE
+	help
+	  If you have a PIC32, this driver supports the putting a console on one
+	  of the serial ports.
+
+	  Say Y to use the PIC32 console, otherwise say N.
+
 config SERIAL_MPC52xx
 	tristate "Freescale MPC52xx/MPC512x family PSC serial support"
 	depends on PPC_MPC52xx || PPC_MPC512x
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 8c261ad..74914aa 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -91,6 +91,7 @@
 obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
 obj-$(CONFIG_SERIAL_STM32)	+= stm32-usart.o
 obj-$(CONFIG_SERIAL_MVEBU_UART)	+= mvebu-uart.o
+obj-$(CONFIG_SERIAL_PIC32)	+= pic32_uart.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)	+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c
new file mode 100644
index 0000000..62a43bf
--- /dev/null
+++ b/drivers/tty/serial/pic32_uart.c
@@ -0,0 +1,960 @@
+/*
+ * PIC32 Integrated Serial Driver.
+ *
+ * Copyright (C) 2015 Microchip Technology, Inc.
+ *
+ * Authors:
+ *   Sorin-Andrei Pistirica <andrei.pistirica@microchip.com>
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/clk.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/delay.h>
+
+#include <asm/mach-pic32/pic32.h>
+#include "pic32_uart.h"
+
+/* UART name and device definitions */
+#define PIC32_DEV_NAME		"pic32-uart"
+#define PIC32_MAX_UARTS		6
+#define PIC32_SDEV_NAME		"ttyPIC"
+
+/* pic32_sport pointer for console use */
+static struct pic32_sport *pic32_sports[PIC32_MAX_UARTS];
+
+static inline void pic32_wait_deplete_txbuf(struct pic32_sport *sport)
+{
+	/* wait for tx empty, otherwise chars will be lost or corrupted */
+	while (!(pic32_uart_readl(sport, PIC32_UART_STA) & PIC32_UART_STA_TRMT))
+		udelay(1);
+}
+
+static inline int pic32_enable_clock(struct pic32_sport *sport)
+{
+	int ret = clk_prepare_enable(sport->clk);
+
+	if (ret)
+		return ret;
+
+	sport->ref_clk++;
+	return 0;
+}
+
+static inline void pic32_disable_clock(struct pic32_sport *sport)
+{
+	sport->ref_clk--;
+	clk_disable_unprepare(sport->clk);
+}
+
+/* serial core request to check if uart tx buffer is empty */
+static unsigned int pic32_uart_tx_empty(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	u32 val = pic32_uart_readl(sport, PIC32_UART_STA);
+
+	return (val & PIC32_UART_STA_TRMT) ? 1 : 0;
+}
+
+/* serial core request to set UART outputs */
+static void pic32_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	/* set loopback mode */
+	if (mctrl & TIOCM_LOOP)
+		pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+					PIC32_UART_MODE_LPBK);
+	else
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_LPBK);
+}
+
+/* get the state of CTS input pin for this port */
+static unsigned int get_cts_state(struct pic32_sport *sport)
+{
+	/* read and invert UxCTS */
+	if (gpio_is_valid(sport->cts_gpio))
+		return !gpio_get_value(sport->cts_gpio);
+
+	return 1;
+}
+
+/* serial core request to return the state of misc UART input pins */
+static unsigned int pic32_uart_get_mctrl(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	unsigned int mctrl = 0;
+
+	if (!sport->hw_flow_ctrl)
+		mctrl |= TIOCM_CTS;
+	else if (get_cts_state(sport))
+		mctrl |= TIOCM_CTS;
+
+	/* DSR and CD are not supported in PIC32, so return 1
+	 * RI is not supported in PIC32, so return 0
+	 */
+	mctrl |= TIOCM_CD;
+	mctrl |= TIOCM_DSR;
+
+	return mctrl;
+}
+
+/* stop tx and start tx are not called in pairs, therefore a flag indicates
+ * the status of irq to control the irq-depth.
+ */
+static inline void pic32_uart_irqtxen(struct pic32_sport *sport, u8 en)
+{
+	if (en && !tx_irq_enabled(sport)) {
+		enable_irq(sport->irq_tx);
+		tx_irq_enabled(sport) = 1;
+	} else if (!en && tx_irq_enabled(sport)) {
+		/* use disable_irq_nosync() and not disable_irq() to avoid self
+		 * imposed deadlock by not waiting for irq handler to end,
+		 * since this callback is called from interrupt context.
+		 */
+		disable_irq_nosync(sport->irq_tx);
+		tx_irq_enabled(sport) = 0;
+	}
+}
+
+/* serial core request to disable tx ASAP (used for flow control) */
+static void pic32_uart_stop_tx(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	if (!(pic32_uart_readl(sport, PIC32_UART_MODE) & PIC32_UART_MODE_ON))
+		return;
+
+	if (!(pic32_uart_readl(sport, PIC32_UART_STA) & PIC32_UART_STA_UTXEN))
+		return;
+
+	/* wait for tx empty */
+	pic32_wait_deplete_txbuf(sport);
+
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+				PIC32_UART_STA_UTXEN);
+	pic32_uart_irqtxen(sport, 0);
+}
+
+/* serial core request to (re)enable tx */
+static void pic32_uart_start_tx(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	pic32_uart_irqtxen(sport, 1);
+	pic32_uart_writel(sport, PIC32_SET(PIC32_UART_STA),
+				PIC32_UART_STA_UTXEN);
+}
+
+/* serial core request to stop rx, called before port shutdown */
+static void pic32_uart_stop_rx(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	/* disable rx interrupts */
+	disable_irq(sport->irq_rx);
+
+	/* receiver Enable bit OFF */
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+				PIC32_UART_STA_URXEN);
+}
+
+/* serial core request to start/stop emitting break char */
+static void pic32_uart_break_ctl(struct uart_port *port, int ctl)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	if (ctl)
+		pic32_uart_writel(sport, PIC32_SET(PIC32_UART_STA),
+					PIC32_UART_STA_UTXBRK);
+	else
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+					PIC32_UART_STA_UTXBRK);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* get port type in string format */
+static const char *pic32_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_PIC32) ? PIC32_DEV_NAME : NULL;
+}
+
+/* read all chars in rx fifo and send them to core */
+static void pic32_uart_do_rx(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	struct tty_port *tty;
+	unsigned int max_count;
+
+	/* limit number of char read in interrupt, should not be
+	 * higher than fifo size anyway since we're much faster than
+	 * serial port
+	 */
+	max_count = PIC32_UART_RX_FIFO_DEPTH;
+
+	spin_lock(&port->lock);
+
+	tty = &port->state->port;
+
+	do {
+		u32 sta_reg, c;
+		char flag;
+
+		/* get overrun/fifo empty information from status register */
+		sta_reg = pic32_uart_readl(sport, PIC32_UART_STA);
+		if (unlikely(sta_reg & PIC32_UART_STA_OERR)) {
+
+			/* fifo reset is required to clear interrupt */
+			pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+						PIC32_UART_STA_OERR);
+
+			port->icount.overrun++;
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		}
+
+		/* Can at least one more character can be read? */
+		if (!(sta_reg & PIC32_UART_STA_URXDA))
+			break;
+
+		/* read the character and increment the rx counter */
+		c = pic32_uart_readl(sport, PIC32_UART_RX);
+
+		port->icount.rx++;
+		flag = TTY_NORMAL;
+		c &= 0xff;
+
+		if (unlikely((sta_reg & PIC32_UART_STA_PERR) ||
+			     (sta_reg & PIC32_UART_STA_FERR))) {
+
+			/* do stats first */
+			if (sta_reg & PIC32_UART_STA_PERR)
+				port->icount.parity++;
+			if (sta_reg & PIC32_UART_STA_FERR)
+				port->icount.frame++;
+
+			/* update flag wrt read_status_mask */
+			sta_reg &= port->read_status_mask;
+
+			if (sta_reg & PIC32_UART_STA_FERR)
+				flag = TTY_FRAME;
+			if (sta_reg & PIC32_UART_STA_PERR)
+				flag = TTY_PARITY;
+		}
+
+		if (uart_handle_sysrq_char(port, c))
+			continue;
+
+		if ((sta_reg & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(tty, c, flag);
+
+	} while (--max_count);
+
+	spin_unlock(&port->lock);
+
+	tty_flip_buffer_push(tty);
+}
+
+/* fill tx fifo with chars to send, stop when fifo is about to be full
+ * or when all chars have been sent.
+ */
+static void pic32_uart_do_tx(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned int max_count = PIC32_UART_TX_FIFO_DEPTH;
+
+	if (port->x_char) {
+		pic32_uart_writel(sport, PIC32_UART_TX, port->x_char);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+
+	if (uart_tx_stopped(port)) {
+		pic32_uart_stop_tx(port);
+		return;
+	}
+
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+
+	/* keep stuffing chars into uart tx buffer
+	 * 1) until uart fifo is full
+	 * or
+	 * 2) until the circ buffer is empty
+	 * (all chars have been sent)
+	 * or
+	 * 3) until the max count is reached
+	 * (prevents lingering here for too long in certain cases)
+	 */
+	while (!(PIC32_UART_STA_UTXBF &
+		pic32_uart_readl(sport, PIC32_UART_STA))) {
+		unsigned int c = xmit->buf[xmit->tail];
+
+		pic32_uart_writel(sport, PIC32_UART_TX, c);
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+		if (--max_count == 0)
+			break;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+
+	return;
+
+txq_empty:
+	pic32_uart_irqtxen(sport, 0);
+}
+
+/* RX interrupt handler */
+static irqreturn_t pic32_uart_rx_interrupt(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+
+	pic32_uart_do_rx(port);
+
+	return IRQ_HANDLED;
+}
+
+/* TX interrupt handler */
+static irqreturn_t pic32_uart_tx_interrupt(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	pic32_uart_do_tx(port);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+/* FAULT interrupt handler */
+static irqreturn_t pic32_uart_fault_interrupt(int irq, void *dev_id)
+{
+	/* do nothing: pic32_uart_do_rx() handles faults. */
+	return IRQ_HANDLED;
+}
+
+/* enable rx & tx operation on uart */
+static void pic32_uart_en_and_unmask(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	pic32_uart_writel(sport, PIC32_SET(PIC32_UART_STA),
+				PIC32_UART_STA_UTXEN | PIC32_UART_STA_URXEN);
+	pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+				PIC32_UART_MODE_ON);
+}
+
+/* disable rx & tx operation on uart */
+static void pic32_uart_dsbl_and_mask(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	/* wait for tx empty, otherwise chars will be lost or corrupted */
+	pic32_wait_deplete_txbuf(sport);
+
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+				PIC32_UART_STA_UTXEN | PIC32_UART_STA_URXEN);
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+				PIC32_UART_MODE_ON);
+}
+
+/* serial core request to initialize uart and start rx operation */
+static int pic32_uart_startup(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	u32 dflt_baud = (port->uartclk / PIC32_UART_DFLT_BRATE / 16) - 1;
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+
+	ret = pic32_enable_clock(sport);
+	if (ret) {
+		local_irq_restore(flags);
+		goto out_done;
+	}
+
+	/* clear status and mode registers */
+	pic32_uart_writel(sport, PIC32_UART_MODE, 0);
+	pic32_uart_writel(sport, PIC32_UART_STA, 0);
+
+	/* disable uart and mask all interrupts */
+	pic32_uart_dsbl_and_mask(port);
+
+	/* set default baud */
+	pic32_uart_writel(sport, PIC32_UART_BRG, dflt_baud);
+
+	local_irq_restore(flags);
+
+	/* Each UART of a PIC32 has three interrupts therefore,
+	 * we setup driver to register the 3 irqs for the device.
+	 *
+	 * For each irq request_irq() is called with interrupt disabled.
+	 * And the irq is enabled as soon as we are ready to handle them.
+	 */
+	tx_irq_enabled(sport) = 0;
+
+	sport->irq_fault_name = kasprintf(GFP_KERNEL, "%s%d-fault",
+					  pic32_uart_type(port),
+					  sport->idx);
+	if (!sport->irq_fault_name) {
+		dev_err(port->dev, "%s: kasprintf err!", __func__);
+		ret = -ENOMEM;
+		goto out_done;
+	}
+	irq_set_status_flags(sport->irq_fault, IRQ_NOAUTOEN);
+	ret = request_irq(sport->irq_fault, pic32_uart_fault_interrupt,
+			  sport->irqflags_fault, sport->irq_fault_name, port);
+	if (ret) {
+		dev_err(port->dev, "%s: request irq(%d) err! ret:%d name:%s\n",
+			__func__, sport->irq_fault, ret,
+			pic32_uart_type(port));
+		goto out_f;
+	}
+
+	sport->irq_rx_name = kasprintf(GFP_KERNEL, "%s%d-rx",
+				       pic32_uart_type(port),
+				       sport->idx);
+	if (!sport->irq_rx_name) {
+		dev_err(port->dev, "%s: kasprintf err!", __func__);
+		kfree(sport->irq_fault_name);
+		ret = -ENOMEM;
+		goto out_f;
+	}
+	irq_set_status_flags(sport->irq_rx, IRQ_NOAUTOEN);
+	ret = request_irq(sport->irq_rx, pic32_uart_rx_interrupt,
+			  sport->irqflags_rx, sport->irq_rx_name, port);
+	if (ret) {
+		dev_err(port->dev, "%s: request irq(%d) err! ret:%d name:%s\n",
+			__func__, sport->irq_rx, ret,
+			pic32_uart_type(port));
+		goto out_r;
+	}
+
+	sport->irq_tx_name = kasprintf(GFP_KERNEL, "%s%d-tx",
+				       pic32_uart_type(port),
+				       sport->idx);
+	if (!sport->irq_tx_name) {
+		dev_err(port->dev, "%s: kasprintf err!", __func__);
+		ret = -ENOMEM;
+		goto out_r;
+	}
+	irq_set_status_flags(sport->irq_tx, IRQ_NOAUTOEN);
+	ret = request_irq(sport->irq_tx, pic32_uart_tx_interrupt,
+			  sport->irqflags_tx, sport->irq_tx_name, port);
+	if (ret) {
+		dev_err(port->dev, "%s: request irq(%d) err! ret:%d name:%s\n",
+			__func__, sport->irq_tx, ret,
+			pic32_uart_type(port));
+		goto out_t;
+	}
+
+	local_irq_save(flags);
+
+	/* set rx interrupt on first receive */
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+			PIC32_UART_STA_URXISEL1 | PIC32_UART_STA_URXISEL0);
+
+	/* set interrupt on empty */
+	pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_STA),
+			PIC32_UART_STA_UTXISEL1);
+
+	/* enable all interrupts and eanable uart */
+	pic32_uart_en_and_unmask(port);
+
+	enable_irq(sport->irq_rx);
+
+	return 0;
+
+out_t:
+	kfree(sport->irq_tx_name);
+	free_irq(sport->irq_tx, sport);
+out_r:
+	kfree(sport->irq_rx_name);
+	free_irq(sport->irq_rx, sport);
+out_f:
+	kfree(sport->irq_fault_name);
+	free_irq(sport->irq_fault, sport);
+out_done:
+	return ret;
+}
+
+/* serial core request to flush & disable uart */
+static void pic32_uart_shutdown(struct uart_port *port)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	unsigned long flags;
+
+	/* disable uart */
+	spin_lock_irqsave(&port->lock, flags);
+	pic32_uart_dsbl_and_mask(port);
+	spin_unlock_irqrestore(&port->lock, flags);
+	pic32_disable_clock(sport);
+
+	/* free all 3 interrupts for this UART */
+	free_irq(sport->irq_fault, port);
+	free_irq(sport->irq_tx, port);
+	free_irq(sport->irq_rx, port);
+}
+
+/* serial core request to change current uart setting */
+static void pic32_uart_set_termios(struct uart_port *port,
+				   struct ktermios *new,
+				   struct ktermios *old)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+	unsigned int baud;
+	unsigned int quot;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* disable uart and mask all interrupts while changing speed */
+	pic32_uart_dsbl_and_mask(port);
+
+	/* stop bit options */
+	if (new->c_cflag & CSTOPB)
+		pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+					PIC32_UART_MODE_STSEL);
+	else
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_STSEL);
+
+	/* parity options */
+	if (new->c_cflag & PARENB) {
+		if (new->c_cflag & PARODD) {
+			pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+					PIC32_UART_MODE_PDSEL1);
+			pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_PDSEL0);
+		} else {
+			pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+					PIC32_UART_MODE_PDSEL0);
+			pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_PDSEL1);
+		}
+	} else {
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_PDSEL1 |
+					PIC32_UART_MODE_PDSEL0);
+	}
+	/* if hw flow ctrl, then the pins must be specified in device tree */
+	if ((new->c_cflag & CRTSCTS) && sport->hw_flow_ctrl) {
+		/* enable hardware flow control */
+		pic32_uart_writel(sport, PIC32_SET(PIC32_UART_MODE),
+					PIC32_UART_MODE_UEN1);
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_UEN0);
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_RTSMD);
+	} else {
+		/* disable hardware flow control */
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_UEN1);
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_UEN0);
+		pic32_uart_writel(sport, PIC32_CLR(PIC32_UART_MODE),
+					PIC32_UART_MODE_RTSMD);
+	}
+
+	/* Always 8-bit */
+	new->c_cflag |= CS8;
+
+	/* Mark/Space parity is not supported */
+	new->c_cflag &= ~CMSPAR;
+
+	/* update baud */
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	quot = uart_get_divisor(port, baud) - 1;
+	pic32_uart_writel(sport, PIC32_UART_BRG, quot);
+	uart_update_timeout(port, new->c_cflag, baud);
+
+	if (tty_termios_baud_rate(new))
+		tty_termios_encode_baud_rate(new, baud, baud);
+
+	/* enable uart */
+	pic32_uart_en_and_unmask(port);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* serial core request to claim uart iomem */
+static int pic32_uart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *res_mem;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!res_mem))
+		return -EINVAL;
+
+	if (!request_mem_region(port->mapbase, resource_size(res_mem),
+				"pic32_uart_mem"))
+		return -EBUSY;
+
+	port->membase = devm_ioremap_nocache(port->dev, port->mapbase,
+						resource_size(res_mem));
+	if (!port->membase) {
+		dev_err(port->dev, "Unable to map registers\n");
+		release_mem_region(port->mapbase, resource_size(res_mem));
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/* serial core request to release uart iomem */
+static void pic32_uart_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *res_mem;
+	unsigned int res_size;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!res_mem))
+		return;
+	res_size = resource_size(res_mem);
+
+	release_mem_region(port->mapbase, res_size);
+}
+
+/* serial core request to do any port required auto-configuration */
+static void pic32_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		if (pic32_uart_request_port(port))
+			return;
+		port->type = PORT_PIC32;
+	}
+}
+
+/* serial core request to check that port information in serinfo are suitable */
+static int pic32_uart_verify_port(struct uart_port *port,
+				  struct serial_struct *serinfo)
+{
+	if (port->type != PORT_PIC32)
+		return -EINVAL;
+	if (port->irq != serinfo->irq)
+		return -EINVAL;
+	if (port->iotype != serinfo->io_type)
+		return -EINVAL;
+	if (port->mapbase != (unsigned long)serinfo->iomem_base)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* serial core callbacks */
+static const struct uart_ops pic32_uart_ops = {
+	.tx_empty	= pic32_uart_tx_empty,
+	.get_mctrl	= pic32_uart_get_mctrl,
+	.set_mctrl	= pic32_uart_set_mctrl,
+	.start_tx	= pic32_uart_start_tx,
+	.stop_tx	= pic32_uart_stop_tx,
+	.stop_rx	= pic32_uart_stop_rx,
+	.break_ctl	= pic32_uart_break_ctl,
+	.startup	= pic32_uart_startup,
+	.shutdown	= pic32_uart_shutdown,
+	.set_termios	= pic32_uart_set_termios,
+	.type		= pic32_uart_type,
+	.release_port	= pic32_uart_release_port,
+	.request_port	= pic32_uart_request_port,
+	.config_port	= pic32_uart_config_port,
+	.verify_port	= pic32_uart_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_PIC32_CONSOLE
+/* output given char */
+static void pic32_console_putchar(struct uart_port *port, int ch)
+{
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	if (!(pic32_uart_readl(sport, PIC32_UART_MODE) & PIC32_UART_MODE_ON))
+		return;
+
+	if (!(pic32_uart_readl(sport, PIC32_UART_STA) & PIC32_UART_STA_UTXEN))
+		return;
+
+	/* wait for tx empty */
+	pic32_wait_deplete_txbuf(sport);
+
+	pic32_uart_writel(sport, PIC32_UART_TX, ch & 0xff);
+}
+
+/* console core request to output given string */
+static void pic32_console_write(struct console *co, const char *s,
+				unsigned int count)
+{
+	struct pic32_sport *sport = pic32_sports[co->index];
+	struct uart_port *port = pic32_get_port(sport);
+
+	/* call uart helper to deal with \r\n */
+	uart_console_write(port, s, count, pic32_console_putchar);
+}
+
+/* console core request to setup given console, find matching uart
+ * port and setup it.
+ */
+static int pic32_console_setup(struct console *co, char *options)
+{
+	struct pic32_sport *sport;
+	struct uart_port *port = NULL;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+	int ret = 0;
+
+	if (unlikely(co->index < 0 || co->index >= PIC32_MAX_UARTS))
+		return -ENODEV;
+
+	sport = pic32_sports[co->index];
+	if (!sport)
+		return -ENODEV;
+	port = pic32_get_port(sport);
+
+	ret = pic32_enable_clock(sport);
+	if (ret)
+		return ret;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver pic32_uart_driver;
+static struct console pic32_console = {
+	.name		= PIC32_SDEV_NAME,
+	.write		= pic32_console_write,
+	.device		= uart_console_device,
+	.setup		= pic32_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &pic32_uart_driver,
+};
+#define PIC32_SCONSOLE (&pic32_console)
+
+static int __init pic32_console_init(void)
+{
+	register_console(&pic32_console);
+	return 0;
+}
+console_initcall(pic32_console_init);
+
+static inline bool is_pic32_console_port(struct uart_port *port)
+{
+	return port->cons && port->cons->index == port->line;
+}
+
+/*
+ * Late console initialization.
+ */
+static int __init pic32_late_console_init(void)
+{
+	if (!(pic32_console.flags & CON_ENABLED))
+		register_console(&pic32_console);
+
+	return 0;
+}
+
+core_initcall(pic32_late_console_init);
+
+#else
+#define PIC32_SCONSOLE NULL
+#endif
+
+static struct uart_driver pic32_uart_driver = {
+	.owner			= THIS_MODULE,
+	.driver_name		= PIC32_DEV_NAME,
+	.dev_name		= PIC32_SDEV_NAME,
+	.nr			= PIC32_MAX_UARTS,
+	.cons			= PIC32_SCONSOLE,
+};
+
+static int pic32_uart_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct pic32_sport *sport;
+	int uart_idx = 0;
+	struct resource *res_mem;
+	struct uart_port *port;
+	int ret;
+
+	uart_idx = of_alias_get_id(np, "serial");
+	if (uart_idx < 0 || uart_idx >= PIC32_MAX_UARTS)
+		return -EINVAL;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res_mem)
+		return -EINVAL;
+
+	sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
+	if (!sport)
+		return -ENOMEM;
+
+	sport->idx		= uart_idx;
+	sport->irq_fault	= irq_of_parse_and_map(np, 0);
+	sport->irqflags_fault	= IRQF_NO_THREAD;
+	sport->irq_rx		= irq_of_parse_and_map(np, 1);
+	sport->irqflags_rx	= IRQF_NO_THREAD;
+	sport->irq_tx		= irq_of_parse_and_map(np, 2);
+	sport->irqflags_tx	= IRQF_NO_THREAD;
+	sport->clk		= devm_clk_get(&pdev->dev, NULL);
+	sport->cts_gpio		= -EINVAL;
+	sport->dev		= &pdev->dev;
+
+	/* Hardware flow control: gpios
+	 * !Note: Basically, CTS is needed for reading the status.
+	 */
+	sport->hw_flow_ctrl = false;
+	sport->cts_gpio = of_get_named_gpio(np, "cts-gpios", 0);
+	if (gpio_is_valid(sport->cts_gpio)) {
+		sport->hw_flow_ctrl = true;
+
+		ret = devm_gpio_request(sport->dev,
+					sport->cts_gpio, "CTS");
+		if (ret) {
+			dev_err(&pdev->dev,
+				"error requesting CTS GPIO\n");
+			goto err;
+		}
+
+		ret = gpio_direction_input(sport->cts_gpio);
+		if (ret) {
+			dev_err(&pdev->dev, "error setting CTS GPIO\n");
+			goto err;
+		}
+	}
+
+	pic32_sports[uart_idx] = sport;
+	port = &sport->port;
+	memset(port, 0, sizeof(*port));
+	port->iotype	= UPIO_MEM;
+	port->mapbase	= res_mem->start;
+	port->ops	= &pic32_uart_ops;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->dev	= &pdev->dev;
+	port->fifosize	= PIC32_UART_TX_FIFO_DEPTH;
+	port->uartclk	= clk_get_rate(sport->clk);
+	port->line	= uart_idx;
+
+	ret = uart_add_one_port(&pic32_uart_driver, port);
+	if (ret) {
+		port->membase = NULL;
+		dev_err(port->dev, "%s: uart add port error!\n", __func__);
+		goto err;
+	}
+
+#ifdef CONFIG_SERIAL_PIC32_CONSOLE
+	if (is_pic32_console_port(port) &&
+	    (pic32_console.flags & CON_ENABLED)) {
+		/* The peripheral clock has been enabled by console_setup,
+		 * so disable it till the port is used.
+		 */
+		pic32_disable_clock(sport);
+	}
+#endif
+
+	platform_set_drvdata(pdev, port);
+
+	dev_info(&pdev->dev, "%s: uart(%d) driver initialized.\n",
+		 __func__, uart_idx);
+
+	return 0;
+err:
+	/* automatic unroll of sport and gpios */
+	return ret;
+}
+
+static int pic32_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = platform_get_drvdata(pdev);
+	struct pic32_sport *sport = to_pic32_sport(port);
+
+	uart_remove_one_port(&pic32_uart_driver, port);
+	pic32_disable_clock(sport);
+	platform_set_drvdata(pdev, NULL);
+	pic32_sports[sport->idx] = NULL;
+
+	/* automatic unroll of sport and gpios */
+	return 0;
+}
+
+static const struct of_device_id pic32_serial_dt_ids[] = {
+	{ .compatible = "microchip,pic32mzda-uart" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pic32_serial_dt_ids);
+
+static struct platform_driver pic32_uart_platform_driver = {
+	.probe		= pic32_uart_probe,
+	.remove		= pic32_uart_remove,
+	.driver		= {
+		.name	= PIC32_DEV_NAME,
+		.of_match_table	= of_match_ptr(pic32_serial_dt_ids),
+	},
+};
+
+static int __init pic32_uart_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&pic32_uart_driver);
+	if (ret) {
+		pr_err("failed to register %s:%d\n",
+		       pic32_uart_driver.driver_name, ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&pic32_uart_platform_driver);
+	if (ret) {
+		pr_err("fail to register pic32 uart\n");
+		uart_unregister_driver(&pic32_uart_driver);
+	}
+
+	return ret;
+}
+arch_initcall(pic32_uart_init);
+
+static void __exit pic32_uart_exit(void)
+{
+#ifdef CONFIG_SERIAL_PIC32_CONSOLE
+	unregister_console(&pic32_console);
+#endif
+	platform_driver_unregister(&pic32_uart_platform_driver);
+	uart_unregister_driver(&pic32_uart_driver);
+}
+module_exit(pic32_uart_exit);
+
+MODULE_AUTHOR("Sorin-Andrei Pistirica <andrei.pistirica@microchip.com>");
+MODULE_DESCRIPTION("Microchip PIC32 integrated serial port driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/pic32_uart.h b/drivers/tty/serial/pic32_uart.h
new file mode 100644
index 0000000..ec379da
--- /dev/null
+++ b/drivers/tty/serial/pic32_uart.h
@@ -0,0 +1,126 @@
+/*
+ * PIC32 Integrated Serial Driver.
+ *
+ * Copyright (C) 2015 Microchip Technology, Inc.
+ *
+ * Authors:
+ *   Sorin-Andrei Pistirica <andrei.pistirica@microchip.com>
+ *
+ * Licensed under GPLv2 or later.
+ */
+#ifndef __DT_PIC32_UART_H__
+#define __DT_PIC32_UART_H__
+
+#define PIC32_UART_DFLT_BRATE		(9600)
+#define PIC32_UART_TX_FIFO_DEPTH	(8)
+#define PIC32_UART_RX_FIFO_DEPTH	(8)
+
+#define PIC32_UART_MODE		0x00
+#define PIC32_UART_STA		0x10
+#define PIC32_UART_TX		0x20
+#define PIC32_UART_RX		0x30
+#define PIC32_UART_BRG		0x40
+
+struct pic32_console_opt {
+	int baud;
+	int parity;
+	int bits;
+	int flow;
+};
+
+/* struct pic32_sport - pic32 serial port descriptor
+ * @port: uart port descriptor
+ * @idx: port index
+ * @irq_fault: virtual fault interrupt number
+ * @irqflags_fault: flags related to fault irq
+ * @irq_fault_name: irq fault name
+ * @irq_rx: virtual rx interrupt number
+ * @irqflags_rx: flags related to rx irq
+ * @irq_rx_name: irq rx name
+ * @irq_tx: virtual tx interrupt number
+ * @irqflags_tx: : flags related to tx irq
+ * @irq_tx_name: irq tx name
+ * @cts_gpio: clear to send gpio
+ * @dev: device descriptor
+ **/
+struct pic32_sport {
+	struct uart_port port;
+	struct pic32_console_opt opt;
+	int idx;
+
+	int irq_fault;
+	int irqflags_fault;
+	const char *irq_fault_name;
+	int irq_rx;
+	int irqflags_rx;
+	const char *irq_rx_name;
+	int irq_tx;
+	int irqflags_tx;
+	const char *irq_tx_name;
+	u8 enable_tx_irq;
+
+	bool hw_flow_ctrl;
+	int cts_gpio;
+
+	int ref_clk;
+	struct clk *clk;
+
+	struct device *dev;
+};
+#define to_pic32_sport(c) container_of(c, struct pic32_sport, port)
+#define pic32_get_port(sport) (&sport->port)
+#define pic32_get_opt(sport) (&sport->opt)
+#define tx_irq_enabled(sport) (sport->enable_tx_irq)
+
+static inline void pic32_uart_writel(struct pic32_sport *sport,
+					u32 reg, u32 val)
+{
+	struct uart_port *port = pic32_get_port(sport);
+
+	__raw_writel(val, port->membase + reg);
+}
+
+static inline u32 pic32_uart_readl(struct pic32_sport *sport, u32 reg)
+{
+	struct uart_port *port = pic32_get_port(sport);
+
+	return	__raw_readl(port->membase + reg);
+}
+
+/* pic32 uart mode register bits */
+#define PIC32_UART_MODE_ON        BIT(15)
+#define PIC32_UART_MODE_FRZ       BIT(14)
+#define PIC32_UART_MODE_SIDL      BIT(13)
+#define PIC32_UART_MODE_IREN      BIT(12)
+#define PIC32_UART_MODE_RTSMD     BIT(11)
+#define PIC32_UART_MODE_RESV1     BIT(10)
+#define PIC32_UART_MODE_UEN1      BIT(9)
+#define PIC32_UART_MODE_UEN0      BIT(8)
+#define PIC32_UART_MODE_WAKE      BIT(7)
+#define PIC32_UART_MODE_LPBK      BIT(6)
+#define PIC32_UART_MODE_ABAUD     BIT(5)
+#define PIC32_UART_MODE_RXINV     BIT(4)
+#define PIC32_UART_MODE_BRGH      BIT(3)
+#define PIC32_UART_MODE_PDSEL1    BIT(2)
+#define PIC32_UART_MODE_PDSEL0    BIT(1)
+#define PIC32_UART_MODE_STSEL     BIT(0)
+
+/* pic32 uart status register bits */
+#define PIC32_UART_STA_UTXISEL1   BIT(15)
+#define PIC32_UART_STA_UTXISEL0   BIT(14)
+#define PIC32_UART_STA_UTXINV     BIT(13)
+#define PIC32_UART_STA_URXEN      BIT(12)
+#define PIC32_UART_STA_UTXBRK     BIT(11)
+#define PIC32_UART_STA_UTXEN      BIT(10)
+#define PIC32_UART_STA_UTXBF      BIT(9)
+#define PIC32_UART_STA_TRMT       BIT(8)
+#define PIC32_UART_STA_URXISEL1   BIT(7)
+#define PIC32_UART_STA_URXISEL0   BIT(6)
+#define PIC32_UART_STA_ADDEN      BIT(5)
+#define PIC32_UART_STA_RIDLE      BIT(4)
+#define PIC32_UART_STA_PERR       BIT(3)
+#define PIC32_UART_STA_FERR       BIT(2)
+#define PIC32_UART_STA_OERR       BIT(1)
+#define PIC32_UART_STA_URXDA      BIT(0)
+
+#endif /* __DT_PIC32_UART_H__ */
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 04dcedf..0449235 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -1245,11 +1245,6 @@
 #define TMIO_OHCI_DRIVER	ohci_hcd_tmio_driver
 #endif
 
-#ifdef CONFIG_MACH_JZ4740
-#include "ohci-jz4740.c"
-#define PLATFORM_DRIVER	ohci_hcd_jz4740_driver
-#endif
-
 #ifdef CONFIG_TILE_USB
 #include "ohci-tilegx.c"
 #define PLATFORM_DRIVER		ohci_hcd_tilegx_driver
diff --git a/drivers/usb/host/ohci-jz4740.c b/drivers/usb/host/ohci-jz4740.c
deleted file mode 100644
index 4db78f1..0000000
--- a/drivers/usb/host/ohci-jz4740.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/regulator/consumer.h>
-
-struct jz4740_ohci_hcd {
-	struct ohci_hcd ohci_hcd;
-
-	struct regulator *vbus;
-	bool vbus_enabled;
-	struct clk *clk;
-};
-
-static inline struct jz4740_ohci_hcd *hcd_to_jz4740_hcd(struct usb_hcd *hcd)
-{
-	return (struct jz4740_ohci_hcd *)(hcd->hcd_priv);
-}
-
-static inline struct usb_hcd *jz4740_hcd_to_hcd(struct jz4740_ohci_hcd *jz4740_ohci)
-{
-	return container_of((void *)jz4740_ohci, struct usb_hcd, hcd_priv);
-}
-
-static int ohci_jz4740_start(struct usb_hcd *hcd)
-{
-	struct ohci_hcd *ohci = hcd_to_ohci(hcd);
-	int	ret;
-
-	ret = ohci_init(ohci);
-	if (ret < 0)
-		return ret;
-
-	ohci->num_ports = 1;
-
-	ret = ohci_run(ohci);
-	if (ret < 0) {
-		dev_err(hcd->self.controller, "Can not start %s",
-			hcd->self.bus_name);
-		ohci_stop(hcd);
-		return ret;
-	}
-	return 0;
-}
-
-static int ohci_jz4740_set_vbus_power(struct jz4740_ohci_hcd *jz4740_ohci,
-	bool enabled)
-{
-	int ret = 0;
-
-	if (!jz4740_ohci->vbus)
-		return 0;
-
-	if (enabled && !jz4740_ohci->vbus_enabled) {
-		ret = regulator_enable(jz4740_ohci->vbus);
-		if (ret)
-			dev_err(jz4740_hcd_to_hcd(jz4740_ohci)->self.controller,
-				"Could not power vbus\n");
-	} else if (!enabled && jz4740_ohci->vbus_enabled) {
-		ret = regulator_disable(jz4740_ohci->vbus);
-	}
-
-	if (ret == 0)
-		jz4740_ohci->vbus_enabled = enabled;
-
-	return ret;
-}
-
-static int ohci_jz4740_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
-	u16 wIndex, char *buf, u16 wLength)
-{
-	struct jz4740_ohci_hcd *jz4740_ohci = hcd_to_jz4740_hcd(hcd);
-	int ret = 0;
-
-	switch (typeReq) {
-	case SetPortFeature:
-		if (wValue == USB_PORT_FEAT_POWER)
-			ret = ohci_jz4740_set_vbus_power(jz4740_ohci, true);
-		break;
-	case ClearPortFeature:
-		if (wValue == USB_PORT_FEAT_POWER)
-			ret = ohci_jz4740_set_vbus_power(jz4740_ohci, false);
-		break;
-	}
-
-	if (ret)
-		return ret;
-
-	return ohci_hub_control(hcd, typeReq, wValue, wIndex, buf, wLength);
-}
-
-
-static const struct hc_driver ohci_jz4740_hc_driver = {
-	.description =		hcd_name,
-	.product_desc =		"JZ4740 OHCI",
-	.hcd_priv_size =	sizeof(struct jz4740_ohci_hcd),
-
-	/*
-	 * generic hardware linkage
-	 */
-	.irq =			ohci_irq,
-	.flags =		HCD_USB11 | HCD_MEMORY,
-
-	/*
-	 * basic lifecycle operations
-	 */
-	.start =		ohci_jz4740_start,
-	.stop =			ohci_stop,
-	.shutdown =		ohci_shutdown,
-
-	/*
-	 * managing i/o requests and associated device resources
-	 */
-	.urb_enqueue =		ohci_urb_enqueue,
-	.urb_dequeue =		ohci_urb_dequeue,
-	.endpoint_disable =	ohci_endpoint_disable,
-
-	/*
-	 * scheduling support
-	 */
-	.get_frame_number =	ohci_get_frame,
-
-	/*
-	 * root hub support
-	 */
-	.hub_status_data =	ohci_hub_status_data,
-	.hub_control =		ohci_jz4740_hub_control,
-#ifdef	CONFIG_PM
-	.bus_suspend =		ohci_bus_suspend,
-	.bus_resume =		ohci_bus_resume,
-#endif
-	.start_port_reset =	ohci_start_port_reset,
-};
-
-
-static int jz4740_ohci_probe(struct platform_device *pdev)
-{
-	int ret;
-	struct usb_hcd *hcd;
-	struct jz4740_ohci_hcd *jz4740_ohci;
-	struct resource *res;
-	int irq;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "Failed to get platform irq\n");
-		return irq;
-	}
-
-	hcd = usb_create_hcd(&ohci_jz4740_hc_driver, &pdev->dev, "jz4740");
-	if (!hcd) {
-		dev_err(&pdev->dev, "Failed to create hcd.\n");
-		return -ENOMEM;
-	}
-
-	jz4740_ohci = hcd_to_jz4740_hcd(hcd);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hcd->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(hcd->regs)) {
-		ret = PTR_ERR(hcd->regs);
-		goto err_free;
-	}
-	hcd->rsrc_start = res->start;
-	hcd->rsrc_len = resource_size(res);
-
-	jz4740_ohci->clk = devm_clk_get(&pdev->dev, "uhc");
-	if (IS_ERR(jz4740_ohci->clk)) {
-		ret = PTR_ERR(jz4740_ohci->clk);
-		dev_err(&pdev->dev, "Failed to get clock: %d\n", ret);
-		goto err_free;
-	}
-
-	jz4740_ohci->vbus = devm_regulator_get(&pdev->dev, "vbus");
-	if (IS_ERR(jz4740_ohci->vbus))
-		jz4740_ohci->vbus = NULL;
-
-
-	clk_set_rate(jz4740_ohci->clk, 48000000);
-	clk_enable(jz4740_ohci->clk);
-	if (jz4740_ohci->vbus)
-		ohci_jz4740_set_vbus_power(jz4740_ohci, true);
-
-	platform_set_drvdata(pdev, hcd);
-
-	ohci_hcd_init(hcd_to_ohci(hcd));
-
-	ret = usb_add_hcd(hcd, irq, 0);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to add hcd: %d\n", ret);
-		goto err_disable;
-	}
-	device_wakeup_enable(hcd->self.controller);
-
-	return 0;
-
-err_disable:
-	if (jz4740_ohci->vbus)
-		regulator_disable(jz4740_ohci->vbus);
-	clk_disable(jz4740_ohci->clk);
-
-err_free:
-	usb_put_hcd(hcd);
-
-	return ret;
-}
-
-static int jz4740_ohci_remove(struct platform_device *pdev)
-{
-	struct usb_hcd *hcd = platform_get_drvdata(pdev);
-	struct jz4740_ohci_hcd *jz4740_ohci = hcd_to_jz4740_hcd(hcd);
-
-	usb_remove_hcd(hcd);
-
-	if (jz4740_ohci->vbus)
-		regulator_disable(jz4740_ohci->vbus);
-
-	clk_disable(jz4740_ohci->clk);
-
-	usb_put_hcd(hcd);
-
-	return 0;
-}
-
-static struct platform_driver ohci_hcd_jz4740_driver = {
-	.probe = jz4740_ohci_probe,
-	.remove = jz4740_ohci_remove,
-	.driver = {
-		.name = "jz4740-ohci",
-	},
-};
-
-MODULE_ALIAS("platform:jz4740-ohci");
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 75b24e9..15a6582 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -407,7 +407,7 @@
 
 	mutex_lock(&iommu->lock);
 	list_for_each_entry(domain, &iommu->domain_list, next)
-		bitmap &= domain->domain->ops->pgsize_bitmap;
+		bitmap &= domain->domain->pgsize_bitmap;
 	mutex_unlock(&iommu->lock);
 
 	/*
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index fb94765..9c41431 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1475,6 +1475,32 @@
 	help
 	  Hardware driver for the Mediatek/Ralink MT7621/8 SoC Watchdog Timer.
 
+config PIC32_WDT
+	tristate "Microchip PIC32 hardware watchdog"
+	select WATCHDOG_CORE
+	depends on MACH_PIC32
+	help
+	  Watchdog driver for the built in watchdog hardware in a PIC32.
+
+	  Configuration bits must be set appropriately for the watchdog to be
+	  controlled by this driver.
+
+	  To compile this driver as a loadable module, choose M here.
+	  The module will be called pic32-wdt.
+
+config PIC32_DMT
+	tristate "Microchip PIC32 Deadman Timer"
+	select WATCHDOG_CORE
+	depends on MACH_PIC32
+	help
+	  Watchdog driver for PIC32 instruction fetch counting timer. This specific
+	  timer is typically be used in misson critical and safety critical
+	  applications, where any single failure of the software functionality
+	  and sequencing must be detected.
+
+	  To compile this driver as a loadable module, choose M here.
+	  The module will be called pic32-dmt.
+
 # PARISC Architecture
 
 # POWERPC Architecture
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index feb6270..9bde095 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -157,6 +157,8 @@
 obj-$(CONFIG_RALINK_WDT) += rt2880_wdt.o
 obj-$(CONFIG_IMGPDC_WDT) += imgpdc_wdt.o
 obj-$(CONFIG_MT7621_WDT) += mt7621_wdt.o
+obj-$(CONFIG_PIC32_WDT) += pic32-wdt.o
+obj-$(CONFIG_PIC32_DMT) += pic32-dmt.o
 
 # PARISC Architecture
 
diff --git a/drivers/watchdog/pic32-dmt.c b/drivers/watchdog/pic32-dmt.c
new file mode 100644
index 0000000..962f58c
--- /dev/null
+++ b/drivers/watchdog/pic32-dmt.c
@@ -0,0 +1,257 @@
+/*
+ * PIC32 deadman timer driver
+ *
+ * Purna Chandra Mandal <purna.mandal@microchip.com>
+ * Copyright (c) 2016, Microchip Technology Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/watchdog.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+/* Deadman Timer Regs */
+#define DMTCON_REG	0x00
+#define DMTPRECLR_REG	0x10
+#define DMTCLR_REG	0x20
+#define DMTSTAT_REG	0x30
+#define DMTCNT_REG	0x40
+#define DMTPSCNT_REG	0x60
+#define DMTPSINTV_REG	0x70
+
+/* Deadman Timer Regs fields */
+#define DMT_ON			BIT(15)
+#define DMT_STEP1_KEY		BIT(6)
+#define DMT_STEP2_KEY		BIT(3)
+#define DMTSTAT_WINOPN		BIT(0)
+#define DMTSTAT_EVENT		BIT(5)
+#define DMTSTAT_BAD2		BIT(6)
+#define DMTSTAT_BAD1		BIT(7)
+
+/* Reset Control Register fields for watchdog */
+#define RESETCON_DMT_TIMEOUT	BIT(5)
+
+struct pic32_dmt {
+	void __iomem	*regs;
+	struct clk	*clk;
+};
+
+static inline void dmt_enable(struct pic32_dmt *dmt)
+{
+	writel(DMT_ON, PIC32_SET(dmt->regs + DMTCON_REG));
+}
+
+static inline void dmt_disable(struct pic32_dmt *dmt)
+{
+	writel(DMT_ON, PIC32_CLR(dmt->regs + DMTCON_REG));
+	/*
+	 * Cannot touch registers in the CPU cycle following clearing the
+	 * ON bit.
+	 */
+	nop();
+}
+
+static inline int dmt_bad_status(struct pic32_dmt *dmt)
+{
+	u32 val;
+
+	val = readl(dmt->regs + DMTSTAT_REG);
+	val &= (DMTSTAT_BAD1 | DMTSTAT_BAD2 | DMTSTAT_EVENT);
+	if (val)
+		return -EAGAIN;
+
+	return 0;
+}
+
+static inline int dmt_keepalive(struct pic32_dmt *dmt)
+{
+	u32 v;
+	u32 timeout = 500;
+
+	/* set pre-clear key */
+	writel(DMT_STEP1_KEY << 8, dmt->regs + DMTPRECLR_REG);
+
+	/* wait for DMT window to open */
+	while (--timeout) {
+		v = readl(dmt->regs + DMTSTAT_REG) & DMTSTAT_WINOPN;
+		if (v == DMTSTAT_WINOPN)
+			break;
+	}
+
+	/* apply key2 */
+	writel(DMT_STEP2_KEY, dmt->regs + DMTCLR_REG);
+
+	/* check whether keys are latched correctly */
+	return dmt_bad_status(dmt);
+}
+
+static inline u32 pic32_dmt_get_timeout_secs(struct pic32_dmt *dmt)
+{
+	unsigned long rate;
+
+	rate = clk_get_rate(dmt->clk);
+	if (rate)
+		return readl(dmt->regs + DMTPSCNT_REG) / rate;
+
+	return 0;
+}
+
+static inline u32 pic32_dmt_bootstatus(struct pic32_dmt *dmt)
+{
+	u32 v;
+	void __iomem *rst_base;
+
+	rst_base = ioremap(PIC32_BASE_RESET, 0x10);
+	if (!rst_base)
+		return 0;
+
+	v = readl(rst_base);
+
+	writel(RESETCON_DMT_TIMEOUT, PIC32_CLR(rst_base));
+
+	iounmap(rst_base);
+	return v & RESETCON_DMT_TIMEOUT;
+}
+
+static int pic32_dmt_start(struct watchdog_device *wdd)
+{
+	struct pic32_dmt *dmt = watchdog_get_drvdata(wdd);
+
+	dmt_enable(dmt);
+	return dmt_keepalive(dmt);
+}
+
+static int pic32_dmt_stop(struct watchdog_device *wdd)
+{
+	struct pic32_dmt *dmt = watchdog_get_drvdata(wdd);
+
+	dmt_disable(dmt);
+
+	return 0;
+}
+
+static int pic32_dmt_ping(struct watchdog_device *wdd)
+{
+	struct pic32_dmt *dmt = watchdog_get_drvdata(wdd);
+
+	return dmt_keepalive(dmt);
+}
+
+static const struct watchdog_ops pic32_dmt_fops = {
+	.owner		= THIS_MODULE,
+	.start		= pic32_dmt_start,
+	.stop		= pic32_dmt_stop,
+	.ping		= pic32_dmt_ping,
+};
+
+static const struct watchdog_info pic32_dmt_ident = {
+	.options	= WDIOF_KEEPALIVEPING |
+			  WDIOF_MAGICCLOSE,
+	.identity	= "PIC32 Deadman Timer",
+};
+
+static struct watchdog_device pic32_dmt_wdd = {
+	.info		= &pic32_dmt_ident,
+	.ops		= &pic32_dmt_fops,
+};
+
+static int pic32_dmt_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct pic32_dmt *dmt;
+	struct resource *mem;
+	struct watchdog_device *wdd = &pic32_dmt_wdd;
+
+	dmt = devm_kzalloc(&pdev->dev, sizeof(*dmt), GFP_KERNEL);
+	if (IS_ERR(dmt))
+		return PTR_ERR(dmt);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmt->regs = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dmt->regs))
+		return PTR_ERR(dmt->regs);
+
+	dmt->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmt->clk)) {
+		dev_err(&pdev->dev, "clk not found\n");
+		return PTR_ERR(dmt->clk);
+	}
+
+	ret = clk_prepare_enable(dmt->clk);
+	if (ret)
+		return ret;
+
+	wdd->timeout = pic32_dmt_get_timeout_secs(dmt);
+	if (!wdd->timeout) {
+		dev_err(&pdev->dev,
+			"failed to read watchdog register timeout\n");
+		ret = -EINVAL;
+		goto out_disable_clk;
+	}
+
+	dev_info(&pdev->dev, "timeout %d\n", wdd->timeout);
+
+	wdd->bootstatus = pic32_dmt_bootstatus(dmt) ? WDIOF_CARDRESET : 0;
+
+	watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT);
+	watchdog_set_drvdata(wdd, dmt);
+
+	ret = watchdog_register_device(wdd);
+	if (ret) {
+		dev_err(&pdev->dev, "watchdog register failed, err %d\n", ret);
+		goto out_disable_clk;
+	}
+
+	platform_set_drvdata(pdev, wdd);
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(dmt->clk);
+	return ret;
+}
+
+static int pic32_dmt_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wdd = platform_get_drvdata(pdev);
+	struct pic32_dmt *dmt = watchdog_get_drvdata(wdd);
+
+	watchdog_unregister_device(wdd);
+	clk_disable_unprepare(dmt->clk);
+
+	return 0;
+}
+
+static const struct of_device_id pic32_dmt_of_ids[] = {
+	{ .compatible = "microchip,pic32mzda-dmt",},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pic32_dmt_of_ids);
+
+static struct platform_driver pic32_dmt_driver = {
+	.probe		= pic32_dmt_probe,
+	.remove		= pic32_dmt_remove,
+	.driver		= {
+		.name		= "pic32-dmt",
+		.owner		= THIS_MODULE,
+		.of_match_table = of_match_ptr(pic32_dmt_of_ids),
+	}
+};
+
+module_platform_driver(pic32_dmt_driver);
+
+MODULE_AUTHOR("Purna Chandra Mandal <purna.mandal@microchip.com>");
+MODULE_DESCRIPTION("Microchip PIC32 DMT Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/pic32-wdt.c b/drivers/watchdog/pic32-wdt.c
new file mode 100644
index 0000000..6047aa8
--- /dev/null
+++ b/drivers/watchdog/pic32-wdt.c
@@ -0,0 +1,263 @@
+/*
+ * PIC32 watchdog driver
+ *
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (c) 2016, Microchip Technology Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/watchdog.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+/* Watchdog Timer Registers */
+#define WDTCON_REG		0x00
+
+/* Watchdog Timer Control Register fields */
+#define WDTCON_WIN_EN		BIT(0)
+#define WDTCON_RMCS_MASK	0x0003
+#define WDTCON_RMCS_SHIFT	0x0006
+#define WDTCON_RMPS_MASK	0x001F
+#define WDTCON_RMPS_SHIFT	0x0008
+#define WDTCON_ON		BIT(15)
+#define WDTCON_CLR_KEY		0x5743
+
+/* Reset Control Register fields for watchdog */
+#define RESETCON_TIMEOUT_IDLE	BIT(2)
+#define RESETCON_TIMEOUT_SLEEP	BIT(3)
+#define RESETCON_WDT_TIMEOUT	BIT(4)
+
+struct pic32_wdt {
+	void __iomem	*regs;
+	void __iomem	*rst_base;
+	struct clk	*clk;
+};
+
+static inline bool pic32_wdt_is_win_enabled(struct pic32_wdt *wdt)
+{
+	return !!(readl(wdt->regs + WDTCON_REG) & WDTCON_WIN_EN);
+}
+
+static inline u32 pic32_wdt_get_post_scaler(struct pic32_wdt *wdt)
+{
+	u32 v = readl(wdt->regs + WDTCON_REG);
+
+	return (v >> WDTCON_RMPS_SHIFT) & WDTCON_RMPS_MASK;
+}
+
+static inline u32 pic32_wdt_get_clk_id(struct pic32_wdt *wdt)
+{
+	u32 v = readl(wdt->regs + WDTCON_REG);
+
+	return (v >> WDTCON_RMCS_SHIFT) & WDTCON_RMCS_MASK;
+}
+
+static int pic32_wdt_bootstatus(struct pic32_wdt *wdt)
+{
+	u32 v = readl(wdt->rst_base);
+
+	writel(RESETCON_WDT_TIMEOUT, PIC32_CLR(wdt->rst_base));
+
+	return v & RESETCON_WDT_TIMEOUT;
+}
+
+static u32 pic32_wdt_get_timeout_secs(struct pic32_wdt *wdt, struct device *dev)
+{
+	unsigned long rate;
+	u32 period, ps, terminal;
+
+	rate = clk_get_rate(wdt->clk);
+
+	dev_dbg(dev, "wdt: clk_id %d, clk_rate %lu (prescale)\n",
+		pic32_wdt_get_clk_id(wdt), rate);
+
+	/* default, prescaler of 32 (i.e. div-by-32) is implicit. */
+	rate >>= 5;
+	if (!rate)
+		return 0;
+
+	/* calculate terminal count from postscaler. */
+	ps = pic32_wdt_get_post_scaler(wdt);
+	terminal = BIT(ps);
+
+	/* find time taken (in secs) to reach terminal count */
+	period = terminal / rate;
+	dev_dbg(dev,
+		"wdt: clk_rate %lu (postscale) / terminal %d, timeout %dsec\n",
+		rate, terminal, period);
+
+	return period;
+}
+
+static void pic32_wdt_keepalive(struct pic32_wdt *wdt)
+{
+	/* write key through single half-word */
+	writew(WDTCON_CLR_KEY, wdt->regs + WDTCON_REG + 2);
+}
+
+static int pic32_wdt_start(struct watchdog_device *wdd)
+{
+	struct pic32_wdt *wdt = watchdog_get_drvdata(wdd);
+
+	writel(WDTCON_ON, PIC32_SET(wdt->regs + WDTCON_REG));
+	pic32_wdt_keepalive(wdt);
+
+	return 0;
+}
+
+static int pic32_wdt_stop(struct watchdog_device *wdd)
+{
+	struct pic32_wdt *wdt = watchdog_get_drvdata(wdd);
+
+	writel(WDTCON_ON, PIC32_CLR(wdt->regs + WDTCON_REG));
+
+	/*
+	 * Cannot touch registers in the CPU cycle following clearing the
+	 * ON bit.
+	 */
+	nop();
+
+	return 0;
+}
+
+static int pic32_wdt_ping(struct watchdog_device *wdd)
+{
+	struct pic32_wdt *wdt = watchdog_get_drvdata(wdd);
+
+	pic32_wdt_keepalive(wdt);
+
+	return 0;
+}
+
+static const struct watchdog_ops pic32_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.start		= pic32_wdt_start,
+	.stop		= pic32_wdt_stop,
+	.ping		= pic32_wdt_ping,
+};
+
+static const struct watchdog_info pic32_wdt_ident = {
+	.options = WDIOF_KEEPALIVEPING |
+			WDIOF_MAGICCLOSE | WDIOF_CARDRESET,
+	.identity = "PIC32 Watchdog",
+};
+
+static struct watchdog_device pic32_wdd = {
+	.info		= &pic32_wdt_ident,
+	.ops		= &pic32_wdt_fops,
+};
+
+static const struct of_device_id pic32_wdt_dt_ids[] = {
+	{ .compatible = "microchip,pic32mzda-wdt", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pic32_wdt_dt_ids);
+
+static int pic32_wdt_drv_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct watchdog_device *wdd = &pic32_wdd;
+	struct pic32_wdt *wdt;
+	struct resource *mem;
+
+	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+	if (IS_ERR(wdt))
+		return PTR_ERR(wdt);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	wdt->regs = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(wdt->regs))
+		return PTR_ERR(wdt->regs);
+
+	wdt->rst_base = devm_ioremap(&pdev->dev, PIC32_BASE_RESET, 0x10);
+	if (IS_ERR(wdt->rst_base))
+		return PTR_ERR(wdt->rst_base);
+
+	wdt->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(wdt->clk)) {
+		dev_err(&pdev->dev, "clk not found\n");
+		return PTR_ERR(wdt->clk);
+	}
+
+	ret = clk_prepare_enable(wdt->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "clk enable failed\n");
+		return ret;
+	}
+
+	if (pic32_wdt_is_win_enabled(wdt)) {
+		dev_err(&pdev->dev, "windowed-clear mode is not supported.\n");
+		ret = -ENODEV;
+		goto out_disable_clk;
+	}
+
+	wdd->timeout = pic32_wdt_get_timeout_secs(wdt, &pdev->dev);
+	if (!wdd->timeout) {
+		dev_err(&pdev->dev,
+			"failed to read watchdog register timeout\n");
+		ret = -EINVAL;
+		goto out_disable_clk;
+	}
+
+	dev_info(&pdev->dev, "timeout %d\n", wdd->timeout);
+
+	wdd->bootstatus = pic32_wdt_bootstatus(wdt) ? WDIOF_CARDRESET : 0;
+
+	watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT);
+	watchdog_set_drvdata(wdd, wdt);
+
+	ret = watchdog_register_device(wdd);
+	if (ret) {
+		dev_err(&pdev->dev, "watchdog register failed, err %d\n", ret);
+		goto out_disable_clk;
+	}
+
+	platform_set_drvdata(pdev, wdd);
+
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(wdt->clk);
+
+	return ret;
+}
+
+static int pic32_wdt_drv_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wdd = platform_get_drvdata(pdev);
+	struct pic32_wdt *wdt = watchdog_get_drvdata(wdd);
+
+	watchdog_unregister_device(wdd);
+	clk_disable_unprepare(wdt->clk);
+
+	return 0;
+}
+
+static struct platform_driver pic32_wdt_driver = {
+	.probe		= pic32_wdt_drv_probe,
+	.remove		= pic32_wdt_drv_remove,
+	.driver		= {
+		.name		= "pic32-wdt",
+		.owner		= THIS_MODULE,
+		.of_match_table = of_match_ptr(pic32_wdt_dt_ids),
+	}
+};
+
+module_platform_driver(pic32_wdt_driver);
+
+MODULE_AUTHOR("Joshua Henderson <joshua.henderson@microchip.com>");
+MODULE_DESCRIPTION("Microchip PIC32 Watchdog Timer");
+MODULE_LICENSE("GPL");
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 3f9312591..71e8a56 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -360,7 +360,7 @@
 				GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
 				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				KEY_USR_VIEW | KEY_USR_READ,
-				KEY_ALLOC_NOT_IN_QUOTA, NULL);
+				KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
diff --git a/fs/exec.c b/fs/exec.c
index a98b21d4..e92419f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -850,15 +850,25 @@
 	if (ret)
 		return ret;
 
+	ret = deny_write_access(file);
+	if (ret)
+		return ret;
+
 	i_size = i_size_read(file_inode(file));
-	if (max_size > 0 && i_size > max_size)
-		return -EFBIG;
-	if (i_size <= 0)
-		return -EINVAL;
+	if (max_size > 0 && i_size > max_size) {
+		ret = -EFBIG;
+		goto out;
+	}
+	if (i_size <= 0) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	*buf = vmalloc(i_size);
-	if (!*buf)
-		return -ENOMEM;
+	if (!*buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	pos = 0;
 	while (pos < i_size) {
@@ -876,18 +886,21 @@
 
 	if (pos != i_size) {
 		ret = -EIO;
-		goto out;
+		goto out_free;
 	}
 
 	ret = security_kernel_post_read_file(file, *buf, i_size, id);
 	if (!ret)
 		*size = pos;
 
-out:
+out_free:
 	if (ret < 0) {
 		vfree(*buf);
 		*buf = NULL;
 	}
+
+out:
+	allow_write_access(file);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file);
diff --git a/fs/namei.c b/fs/namei.c
index 9d193d3..5375571 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3627,6 +3627,8 @@
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
 			error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+			if (!error)
+				ima_post_path_mknod(dentry);
 			break;
 		case S_IFCHR: case S_IFBLK:
 			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 5ba22c6..c444285 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -201,7 +201,7 @@
 				GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
 				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				KEY_USR_VIEW | KEY_USR_READ,
-				KEY_ALLOC_NOT_IN_QUOTA, NULL);
+				KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 8afe10c..8ab782d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1071,7 +1071,7 @@
 	/* Do some basic Verification. */
 	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
 		(ehdr.e_type != ET_CORE) ||
-		!elf_check_arch(&ehdr) ||
+		!vmcore_elf32_check_arch(&ehdr) ||
 		ehdr.e_ident[EI_CLASS] != ELFCLASS32||
 		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
 		ehdr.e_version != EV_CURRENT ||
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index eed3bbe..002b81f 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -191,7 +191,7 @@
 #define readl_relaxed readl
 #endif
 
-#ifndef readq_relaxed
+#if defined(readq) && !defined(readq_relaxed)
 #define readq_relaxed readq
 #endif
 
@@ -207,7 +207,7 @@
 #define writel_relaxed writel
 #endif
 
-#ifndef writeq_relaxed
+#if defined(writeq) && !defined(writeq_relaxed)
 #define writeq_relaxed writeq
 #endif
 
diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h
index c9ccafa..e74072d 100644
--- a/include/asm-generic/seccomp.h
+++ b/include/asm-generic/seccomp.h
@@ -29,4 +29,18 @@
 #define __NR_seccomp_sigreturn		__NR_rt_sigreturn
 #endif
 
+#ifdef CONFIG_COMPAT
+#ifndef get_compat_mode1_syscalls
+static inline const int *get_compat_mode1_syscalls(void)
+{
+	static const int mode1_syscalls_32[] = {
+		__NR_seccomp_read_32, __NR_seccomp_write_32,
+		__NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
+		0, /* null terminated */
+	};
+	return mode1_syscalls_32;
+}
+#endif
+#endif /* CONFIG_COMPAT */
+
 #endif /* _ASM_GENERIC_SECCOMP_H */
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 3d1a3af..a2508a8 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -17,21 +17,6 @@
 struct siginfo;
 void do_schedule_next_timer(struct siginfo *info);
 
-#ifndef HAVE_ARCH_COPY_SIGINFO
-
-#include <linux/string.h>
-
-static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
-{
-	if (from->si_code < 0)
-		memcpy(to, from, sizeof(*to));
-	else
-		/* _sigchld is currently the largest know union member */
-		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
-}
-
-#endif
-
 extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
 
 #endif
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 25d0914..caedb74 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -49,11 +49,16 @@
 
 #define ARCH_TIMER_EVT_STREAM_FREQ	10000	/* 100us */
 
+struct arch_timer_kvm_info {
+	struct timecounter timecounter;
+	int virtual_irq;
+};
+
 #ifdef CONFIG_ARM_ARCH_TIMER
 
 extern u32 arch_timer_get_rate(void);
 extern u64 (*arch_timer_read_counter)(void);
-extern struct timecounter *arch_timer_get_timecounter(void);
+extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
 
 #else
 
@@ -67,11 +72,6 @@
 	return 0;
 }
 
-static inline struct timecounter *arch_timer_get_timecounter(void)
-{
-	return NULL;
-}
-
 #endif
 
 #endif
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
index 441aff9..583f199 100644
--- a/include/crypto/pkcs7.h
+++ b/include/crypto/pkcs7.h
@@ -12,6 +12,7 @@
 #ifndef _CRYPTO_PKCS7_H
 #define _CRYPTO_PKCS7_H
 
+#include <linux/verification.h>
 #include <crypto/public_key.h>
 
 struct key;
@@ -26,14 +27,13 @@
 
 extern int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
 				  const void **_data, size_t *_datalen,
-				  bool want_wrapper);
+				  size_t *_headerlen);
 
 /*
  * pkcs7_trust.c
  */
 extern int pkcs7_validate_trust(struct pkcs7_message *pkcs7,
-				struct key *trust_keyring,
-				bool *_trusted);
+				struct key *trust_keyring);
 
 /*
  * pkcs7_verify.c
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index aa730ea..882ca0e1 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -15,20 +15,6 @@
 #define _LINUX_PUBLIC_KEY_H
 
 /*
- * The use to which an asymmetric key is being put.
- */
-enum key_being_used_for {
-	VERIFYING_MODULE_SIGNATURE,
-	VERIFYING_FIRMWARE_SIGNATURE,
-	VERIFYING_KEXEC_PE_SIGNATURE,
-	VERIFYING_KEY_SIGNATURE,
-	VERIFYING_KEY_SELF_SIGNATURE,
-	VERIFYING_UNSPECIFIED_SIGNATURE,
-	NR__KEY_BEING_USED_FOR
-};
-extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
-
-/*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
  *
  * Note that this may include private part of the key as well as the public
@@ -41,12 +27,13 @@
 	const char *pkey_algo;
 };
 
-extern void public_key_destroy(void *payload);
+extern void public_key_free(struct public_key *key);
 
 /*
  * Public key cryptography signature data
  */
 struct public_key_signature {
+	struct asymmetric_key_id *auth_ids[2];
 	u8 *s;			/* Signature */
 	u32 s_size;		/* Number of bytes in signature */
 	u8 *digest;
@@ -55,17 +42,21 @@
 	const char *hash_algo;
 };
 
+extern void public_key_signature_free(struct public_key_signature *sig);
+
 extern struct asymmetric_key_subtype public_key_subtype;
+
 struct key;
+struct key_type;
+union key_payload;
+
+extern int restrict_link_by_signature(struct key *trust_keyring,
+				      const struct key_type *type,
+				      const union key_payload *payload);
+
 extern int verify_signature(const struct key *key,
 			    const struct public_key_signature *sig);
 
-struct asymmetric_key_id;
-extern struct key *x509_request_asymmetric_key(struct key *keyring,
-					       const struct asymmetric_key_id *id,
-					       const struct asymmetric_key_id *skid,
-					       bool partial);
-
 int public_key_verify_signature(const struct public_key *pkey,
 				const struct public_key_signature *sig);
 
diff --git a/include/dt-bindings/clock/ath79-clk.h b/include/dt-bindings/clock/ath79-clk.h
new file mode 100644
index 0000000..27359ad
--- /dev/null
+++ b/include/dt-bindings/clock/ath79-clk.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2014, 2016 Antony Pavlov <antonynpavlov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __DT_BINDINGS_ATH79_CLK_H
+#define __DT_BINDINGS_ATH79_CLK_H
+
+#define ATH79_CLK_CPU		0
+#define ATH79_CLK_DDR		1
+#define ATH79_CLK_AHB		2
+
+#define ATH79_CLK_END		3
+
+#endif /* __DT_BINDINGS_ATH79_CLK_H */
diff --git a/include/dt-bindings/clock/microchip,pic32-clock.h b/include/dt-bindings/clock/microchip,pic32-clock.h
new file mode 100644
index 0000000..184647a6
--- /dev/null
+++ b/include/dt-bindings/clock/microchip,pic32-clock.h
@@ -0,0 +1,42 @@
+/*
+ * Purna Chandra Mandal,<purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MICROCHIP_PIC32_H_
+#define _DT_BINDINGS_CLK_MICROCHIP_PIC32_H_
+
+/* clock output indices */
+#define POSCCLK		0
+#define FRCCLK		1
+#define BFRCCLK		2
+#define LPRCCLK		3
+#define SOSCCLK		4
+#define FRCDIVCLK	5
+#define PLLCLK		6
+#define SCLK		7
+#define PB1CLK		8
+#define PB2CLK		9
+#define PB3CLK		10
+#define PB4CLK		11
+#define PB5CLK		12
+#define PB6CLK		13
+#define PB7CLK		14
+#define REF1CLK		15
+#define REF2CLK		16
+#define REF3CLK		17
+#define REF4CLK		18
+#define REF5CLK		19
+#define UPLLCLK		20
+#define MAXCLKS		21
+
+#endif	/* _DT_BINDINGS_CLK_MICROCHIP_PIC32_H_ */
diff --git a/include/dt-bindings/gpio/meson-gxbb-gpio.h b/include/dt-bindings/gpio/meson-gxbb-gpio.h
new file mode 100644
index 0000000..58654fd
--- /dev/null
+++ b/include/dt-bindings/gpio/meson-gxbb-gpio.h
@@ -0,0 +1,154 @@
+/*
+ * GPIO definitions for Amlogic Meson GXBB SoCs
+ *
+ * Copyright (C) 2016 Endless Mobile, Inc.
+ * Author: Carlo Caione <carlo@endlessm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DT_BINDINGS_MESON_GXBB_GPIO_H
+#define _DT_BINDINGS_MESON_GXBB_GPIO_H
+
+#define	GPIOAO_0	0
+#define	GPIOAO_1	1
+#define	GPIOAO_2	2
+#define	GPIOAO_3	3
+#define	GPIOAO_4	4
+#define	GPIOAO_5	5
+#define	GPIOAO_6	6
+#define	GPIOAO_7	7
+#define	GPIOAO_8	8
+#define	GPIOAO_9	9
+#define	GPIOAO_10	10
+#define	GPIOAO_11	11
+#define	GPIOAO_12	12
+#define	GPIOAO_13	13
+
+#define	GPIOZ_0		0
+#define	GPIOZ_1		1
+#define	GPIOZ_2		2
+#define	GPIOZ_3		3
+#define	GPIOZ_4		4
+#define	GPIOZ_5		5
+#define	GPIOZ_6		6
+#define	GPIOZ_7		7
+#define	GPIOZ_8		8
+#define	GPIOZ_9		9
+#define	GPIOZ_10	10
+#define	GPIOZ_11	11
+#define	GPIOZ_12	12
+#define	GPIOZ_13	13
+#define	GPIOZ_14	14
+#define	GPIOZ_15	15
+#define	GPIOH_0		16
+#define	GPIOH_1		17
+#define	GPIOH_2		18
+#define	GPIOH_3		19
+#define	BOOT_0		20
+#define	BOOT_1		21
+#define	BOOT_2		22
+#define	BOOT_3		23
+#define	BOOT_4		24
+#define	BOOT_5		25
+#define	BOOT_6		26
+#define	BOOT_7		27
+#define	BOOT_8		28
+#define	BOOT_9		29
+#define	BOOT_10		30
+#define	BOOT_11		31
+#define	BOOT_12		32
+#define	BOOT_13		33
+#define	BOOT_14		34
+#define	BOOT_15		35
+#define	BOOT_16		36
+#define	BOOT_17		37
+#define	CARD_0		38
+#define	CARD_1		39
+#define	CARD_2		40
+#define	CARD_3		41
+#define	CARD_4		42
+#define	CARD_5		43
+#define	CARD_6		44
+#define	GPIODV_0	45
+#define	GPIODV_1	46
+#define	GPIODV_2	47
+#define	GPIODV_3	48
+#define	GPIODV_4	49
+#define	GPIODV_5	50
+#define	GPIODV_6	51
+#define	GPIODV_7	52
+#define	GPIODV_8	53
+#define	GPIODV_9	54
+#define	GPIODV_10	55
+#define	GPIODV_11	56
+#define	GPIODV_12	57
+#define	GPIODV_13	58
+#define	GPIODV_14	59
+#define	GPIODV_15	60
+#define	GPIODV_16	61
+#define	GPIODV_17	62
+#define	GPIODV_18	63
+#define	GPIODV_19	64
+#define	GPIODV_20	65
+#define	GPIODV_21	66
+#define	GPIODV_22	67
+#define	GPIODV_23	68
+#define	GPIODV_24	69
+#define	GPIODV_25	70
+#define	GPIODV_26	71
+#define	GPIODV_27	72
+#define	GPIODV_28	73
+#define	GPIODV_29	74
+#define	GPIOY_0		75
+#define	GPIOY_1		76
+#define	GPIOY_2		77
+#define	GPIOY_3		78
+#define	GPIOY_4		79
+#define	GPIOY_5		80
+#define	GPIOY_6		81
+#define	GPIOY_7		82
+#define	GPIOY_8		83
+#define	GPIOY_9		84
+#define	GPIOY_10	85
+#define	GPIOY_11	86
+#define	GPIOY_12	87
+#define	GPIOY_13	88
+#define	GPIOY_14	89
+#define	GPIOY_15	90
+#define	GPIOY_16	91
+#define	GPIOX_0		92
+#define	GPIOX_1		93
+#define	GPIOX_2		94
+#define	GPIOX_3		95
+#define	GPIOX_4		96
+#define	GPIOX_5		97
+#define	GPIOX_6		98
+#define	GPIOX_7		99
+#define	GPIOX_8		100
+#define	GPIOX_9		101
+#define	GPIOX_10	102
+#define	GPIOX_11	103
+#define	GPIOX_12	104
+#define	GPIOX_13	105
+#define	GPIOX_14	106
+#define	GPIOX_15	107
+#define	GPIOX_16	108
+#define	GPIOX_17	109
+#define	GPIOX_18	110
+#define	GPIOX_19	111
+#define	GPIOX_20	112
+#define	GPIOX_21	113
+#define	GPIOX_22	114
+#define	GPIOCLK_0	115
+#define	GPIOCLK_1	116
+#define	GPIOCLK_2	117
+#define	GPIOCLK_3	118
+#define	GPIO_TEST_N	119
+
+#endif
diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h
index 4915d40..2480469 100644
--- a/include/keys/asymmetric-subtype.h
+++ b/include/keys/asymmetric-subtype.h
@@ -32,7 +32,7 @@
 	void (*describe)(const struct key *key, struct seq_file *m);
 
 	/* Destroy a key of this subtype */
-	void (*destroy)(void *payload);
+	void (*destroy)(void *payload_crypto, void *payload_auth);
 
 	/* Verify the signature on a key of this subtype (optional) */
 	int (*verify_signature)(const struct key *key,
diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h
index 59c1df9..b382407 100644
--- a/include/keys/asymmetric-type.h
+++ b/include/keys/asymmetric-type.h
@@ -15,6 +15,7 @@
 #define _KEYS_ASYMMETRIC_TYPE_H
 
 #include <linux/key-type.h>
+#include <linux/verification.h>
 
 extern struct key_type key_type_asymmetric;
 
@@ -23,9 +24,10 @@
  * follows:
  */
 enum asymmetric_payload_bits {
-	asym_crypto,
-	asym_subtype,
-	asym_key_ids,
+	asym_crypto,		/* The data representing the key */
+	asym_subtype,		/* Pointer to an asymmetric_key_subtype struct */
+	asym_key_ids,		/* Pointer to an asymmetric_key_ids struct */
+	asym_auth		/* The key's authorisation (signature, parent key ID) */
 };
 
 /*
@@ -74,6 +76,11 @@
 	return key->payload.data[asym_key_ids];
 }
 
+extern struct key *find_asymmetric_key(struct key *keyring,
+				       const struct asymmetric_key_id *id_0,
+				       const struct asymmetric_key_id *id_1,
+				       bool partial);
+
 /*
  * The payload is at the discretion of the subtype.
  */
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 39fd38c..fbd4647 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -12,51 +12,40 @@
 #ifndef _KEYS_SYSTEM_KEYRING_H
 #define _KEYS_SYSTEM_KEYRING_H
 
+#include <linux/key.h>
+
 #ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
 
-#include <linux/key.h>
-#include <crypto/public_key.h>
+extern int restrict_link_by_builtin_trusted(struct key *keyring,
+					    const struct key_type *type,
+					    const union key_payload *payload);
 
-extern struct key *system_trusted_keyring;
-static inline struct key *get_system_trusted_keyring(void)
-{
-	return system_trusted_keyring;
-}
 #else
-static inline struct key *get_system_trusted_keyring(void)
-{
-	return NULL;
-}
+#define restrict_link_by_builtin_trusted restrict_link_reject
 #endif
 
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-extern int system_verify_data(const void *data, unsigned long len,
-			      const void *raw_pkcs7, size_t pkcs7_len,
-			      enum key_being_used_for usage);
+#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
+extern int restrict_link_by_builtin_and_secondary_trusted(
+	struct key *keyring,
+	const struct key_type *type,
+	const union key_payload *payload);
+#else
+#define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
 #endif
 
-#ifdef CONFIG_IMA_MOK_KEYRING
-extern struct key *ima_mok_keyring;
+#ifdef CONFIG_IMA_BLACKLIST_KEYRING
 extern struct key *ima_blacklist_keyring;
 
-static inline struct key *get_ima_mok_keyring(void)
-{
-	return ima_mok_keyring;
-}
 static inline struct key *get_ima_blacklist_keyring(void)
 {
 	return ima_blacklist_keyring;
 }
 #else
-static inline struct key *get_ima_mok_keyring(void)
-{
-	return NULL;
-}
 static inline struct key *get_ima_blacklist_keyring(void)
 {
 	return NULL;
 }
-#endif /* CONFIG_IMA_MOK_KEYRING */
+#endif /* CONFIG_IMA_BLACKLIST_KEYRING */
 
 
 #endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 281caf8..be6037a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
 
 #define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
@@ -353,15 +354,15 @@
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
 #define vgic_ready(k)		((k)->arch.vgic.ready)
 
-int vgic_v2_probe(struct device_node *vgic_node,
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
 		  const struct vgic_ops **ops,
 		  const struct vgic_params **params);
 #ifdef CONFIG_KVM_ARM_VGIC_V3
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
 		  const struct vgic_ops **ops,
 		  const struct vgic_params **params);
 #else
-static inline int vgic_v3_probe(struct device_node *vgic_node,
+static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
 				const struct vgic_ops **ops,
 				const struct vgic_params **params)
 {
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 10fe2a2..27e9ec8 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -86,7 +86,7 @@
  * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  */
 struct pl08x_platform_data {
-	const struct pl08x_channel_data *slave_channels;
+	struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
 	struct pl08x_channel_data memcpy_channel;
 	int (*get_xfer_signal)(const struct pl08x_channel_data *);
diff --git a/include/linux/bcm47xx_sprom.h b/include/linux/bcm47xx_sprom.h
new file mode 100644
index 0000000..c06b47c
--- /dev/null
+++ b/include/linux/bcm47xx_sprom.h
@@ -0,0 +1,24 @@
+/*
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef __BCM47XX_SPROM_H
+#define __BCM47XX_SPROM_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+
+#ifdef CONFIG_BCM47XX_SPROM
+int bcm47xx_sprom_register_fallbacks(void);
+#else
+static inline int bcm47xx_sprom_register_fallbacks(void)
+{
+	return -ENOTSUPP;
+};
+#endif
+
+#endif /* __BCM47XX_SPROM_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index defeaac..299e76b 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -227,6 +227,22 @@
 })
 #endif
 
+#ifndef bit_clear_unless
+#define bit_clear_unless(ptr, _clear, _test)	\
+({								\
+	const typeof(*ptr) clear = (_clear), test = (_test);	\
+	typeof(*ptr) old, new;					\
+								\
+	do {							\
+		old = ACCESS_ONCE(*ptr);			\
+		new = old & ~clear;				\
+	} while (!(old & test) &&				\
+		 cmpxchg(ptr, old, new) != old);		\
+								\
+	!(old & test);						\
+})
+#endif
+
 #ifndef find_last_bit
 /**
  * find_last_bit - find the last set bit in a memory region
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 3849fce..3873697 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -34,9 +34,13 @@
 
 /*
  * Architecture code can redefine this if there are any special checks
- * needed for 64-bit ELF vmcores. In case of 32-bit only architecture,
- * this can be set to zero.
+ * needed for 32-bit ELF or 64-bit ELF vmcores.  In case of 32-bit
+ * only architecture, vmcore_elf64_check_arch can be set to zero.
  */
+#ifndef vmcore_elf32_check_arch
+#define vmcore_elf32_check_arch(x) elf_check_arch(x)
+#endif
+
 #ifndef vmcore_elf64_check_arch
 #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 #endif
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index fc48103..8443bbb 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -38,8 +38,8 @@
  * These implement the bulk of the relevant DMA mapping callbacks, but require
  * the arch code to take care of attributes and cache maintenance
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t));
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 9ea9aba..71c1b215 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -514,7 +514,7 @@
 
 #ifndef arch_setup_dma_ops
 static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
-				      u64 size, struct iommu_ops *iommu,
+				      u64 size, const struct iommu_ops *iommu,
 				      bool coherent) { }
 #endif
 
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index 7145644..f2e538a 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -27,6 +27,7 @@
  * @regs:		memory mapped I/O space
  * @clk:		hclk clock
  * @dw:			struct dw_dma that is filed by dw_dma_probe()
+ * @pdata:		pointer to platform data
  */
 struct dw_dma_chip {
 	struct device	*dev;
@@ -34,10 +35,12 @@
 	void __iomem	*regs;
 	struct clk	*clk;
 	struct dw_dma	*dw;
+
+	const struct dw_dma_platform_data	*pdata;
 };
 
 /* Export to the platform drivers */
-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
+int dw_dma_probe(struct dw_dma_chip *chip);
 int dw_dma_remove(struct dw_dma_chip *chip);
 
 /* DMA API extensions */
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h
index 34b98f2..3ae3000 100644
--- a/include/linux/dma/xilinx_dma.h
+++ b/include/linux/dma/xilinx_dma.h
@@ -41,6 +41,20 @@
 	int ext_fsync;
 };
 
+/**
+ * enum xdma_ip_type: DMA IP type.
+ *
+ * XDMA_TYPE_AXIDMA: Axi dma ip.
+ * XDMA_TYPE_CDMA: Axi cdma ip.
+ * XDMA_TYPE_VDMA: Axi vdma ip.
+ *
+ */
+enum xdma_ip_type {
+	XDMA_TYPE_AXIDMA = 0,
+	XDMA_TYPE_CDMA,
+	XDMA_TYPE_VDMA,
+};
+
 int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
 					struct xilinx_vdma_config *cfg);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0174337..30de019 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -804,6 +804,9 @@
 	sg_dma_address(&sg) = buf;
 	sg_dma_len(&sg) = len;
 
+	if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
+		return NULL;
+
 	return chan->device->device_prep_slave_sg(chan, &sg, 1,
 						  dir, flags, NULL);
 }
@@ -812,6 +815,9 @@
 	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
 	enum dma_transfer_direction dir, unsigned long flags)
 {
+	if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
+		return NULL;
+
 	return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
 						  dir, flags, NULL);
 }
@@ -823,6 +829,9 @@
 	enum dma_transfer_direction dir, unsigned long flags,
 	struct rio_dma_ext *rio_ext)
 {
+	if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
+		return NULL;
+
 	return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
 						  dir, flags, rio_ext);
 }
@@ -833,6 +842,9 @@
 		size_t period_len, enum dma_transfer_direction dir,
 		unsigned long flags)
 {
+	if (!chan || !chan->device || !chan->device->device_prep_dma_cyclic)
+		return NULL;
+
 	return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len,
 						period_len, dir, flags);
 }
@@ -841,6 +853,9 @@
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags)
 {
+	if (!chan || !chan->device || !chan->device->device_prep_interleaved_dma)
+		return NULL;
+
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
@@ -848,7 +863,7 @@
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags)
 {
-	if (!chan || !chan->device)
+	if (!chan || !chan->device || !chan->device->device_prep_dma_memset)
 		return NULL;
 
 	return chan->device->device_prep_dma_memset(chan, dest, value,
@@ -861,6 +876,9 @@
 		struct scatterlist *src_sg, unsigned int src_nents,
 		unsigned long flags)
 {
+	if (!chan || !chan->device || !chan->device->device_prep_dma_sg)
+		return NULL;
+
 	return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents,
 			src_sg, src_nents, flags);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 851390c..10d3d8f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2634,15 +2634,34 @@
 #endif
 extern int do_pipe_flags(int *, int);
 
+#define __kernel_read_file_id(id) \
+	id(UNKNOWN, unknown)		\
+	id(FIRMWARE, firmware)		\
+	id(MODULE, kernel-module)		\
+	id(KEXEC_IMAGE, kexec-image)		\
+	id(KEXEC_INITRAMFS, kexec-initramfs)	\
+	id(POLICY, security-policy)		\
+	id(MAX_ID, )
+
+#define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
+#define __fid_stringify(dummy, str) #str,
+
 enum kernel_read_file_id {
-	READING_FIRMWARE = 1,
-	READING_MODULE,
-	READING_KEXEC_IMAGE,
-	READING_KEXEC_INITRAMFS,
-	READING_POLICY,
-	READING_MAX_ID
+	__kernel_read_file_id(__fid_enumify)
 };
 
+static const char * const kernel_read_file_str[] = {
+	__kernel_read_file_id(__fid_stringify)
+};
+
+static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
+{
+	if (id < 0 || id >= READING_MAX_ID)
+		return kernel_read_file_str[READING_UNKNOWN];
+
+	return kernel_read_file_str[id];
+}
+
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
 			    enum kernel_read_file_id);
diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h
index b5f9a00..d4c1d12 100644
--- a/include/linux/i2c-mux.h
+++ b/include/linux/i2c-mux.h
@@ -27,22 +27,49 @@
 
 #ifdef __KERNEL__
 
-/*
- * Called to create a i2c bus on a multiplexed bus segment.
- * The mux_dev and chan_id parameters are passed to the select
- * and deselect callback functions to perform hardware-specific
- * mux control.
- */
-struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
-				struct device *mux_dev,
-				void *mux_priv, u32 force_nr, u32 chan_id,
-				unsigned int class,
-				int (*select) (struct i2c_adapter *,
-					       void *mux_dev, u32 chan_id),
-				int (*deselect) (struct i2c_adapter *,
-						 void *mux_dev, u32 chan_id));
+#include <linux/bitops.h>
 
-void i2c_del_mux_adapter(struct i2c_adapter *adap);
+struct i2c_mux_core {
+	struct i2c_adapter *parent;
+	struct device *dev;
+	bool mux_locked;
+
+	void *priv;
+
+	int (*select)(struct i2c_mux_core *, u32 chan_id);
+	int (*deselect)(struct i2c_mux_core *, u32 chan_id);
+
+	int num_adapters;
+	int max_adapters;
+	struct i2c_adapter *adapter[0];
+};
+
+struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent,
+				   struct device *dev, int max_adapters,
+				   int sizeof_priv, u32 flags,
+				   int (*select)(struct i2c_mux_core *, u32),
+				   int (*deselect)(struct i2c_mux_core *, u32));
+
+/* flags for i2c_mux_alloc */
+#define I2C_MUX_LOCKED BIT(0)
+
+static inline void *i2c_mux_priv(struct i2c_mux_core *muxc)
+{
+	return muxc->priv;
+}
+
+struct i2c_adapter *i2c_root_adapter(struct device *dev);
+
+/*
+ * Called to create an i2c bus on a multiplexed bus segment.
+ * The chan_id parameter is passed to the select and deselect
+ * callback functions to perform hardware-specific mux control.
+ */
+int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
+			u32 force_nr, u32 chan_id,
+			unsigned int class);
+
+void i2c_mux_del_adapters(struct i2c_mux_core *muxc);
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 200cf13b..96a25ae 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -524,6 +524,7 @@
 
 	/* data fields that are valid for all devices	*/
 	struct rt_mutex bus_lock;
+	struct rt_mutex mux_lock;
 
 	int timeout;			/* in jiffies */
 	int retries;
@@ -538,6 +539,10 @@
 
 	struct i2c_bus_recovery_info *bus_recovery_info;
 	const struct i2c_adapter_quirks *quirks;
+
+	void (*lock_bus)(struct i2c_adapter *, unsigned int flags);
+	int (*trylock_bus)(struct i2c_adapter *, unsigned int flags);
+	void (*unlock_bus)(struct i2c_adapter *, unsigned int flags);
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
 
@@ -567,8 +572,44 @@
 int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *));
 
 /* Adapter locking functions, exported for shared pin cases */
-void i2c_lock_adapter(struct i2c_adapter *);
-void i2c_unlock_adapter(struct i2c_adapter *);
+#define I2C_LOCK_ROOT_ADAPTER BIT(0)
+#define I2C_LOCK_SEGMENT      BIT(1)
+
+/**
+ * i2c_lock_bus - Get exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	locks only this branch in the adapter tree
+ */
+static inline void
+i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags)
+{
+	adapter->lock_bus(adapter, flags);
+}
+
+/**
+ * i2c_unlock_bus - Release exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	unlocks only this branch in the adapter tree
+ */
+static inline void
+i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
+{
+	adapter->unlock_bus(adapter, flags);
+}
+
+static inline void
+i2c_lock_adapter(struct i2c_adapter *adapter)
+{
+	i2c_lock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
+}
+
+static inline void
+i2c_unlock_adapter(struct i2c_adapter *adapter)
+{
+	i2c_unlock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
+}
 
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
@@ -654,6 +695,11 @@
 	return adap->nr;
 }
 
+static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
+{
+	return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0);
+}
+
 /**
  * module_i2c_driver() - Helper macro for registering a modular I2C driver
  * @__i2c_driver: i2c_driver struct
diff --git a/include/linux/ima.h b/include/linux/ima.h
index e6516cb..0eb7c2e 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -21,6 +21,7 @@
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
+extern void ima_post_path_mknod(struct dentry *dentry);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -54,6 +55,11 @@
 	return 0;
 }
 
+static inline void ima_post_path_mknod(struct dentry *dentry)
+{
+	return;
+}
+
 #endif /* CONFIG_IMA */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h
index 11d7e84..defcc46 100644
--- a/include/linux/io-64-nonatomic-hi-lo.h
+++ b/include/linux/io-64-nonatomic-hi-lo.h
@@ -21,6 +21,23 @@
 	writel(val, addr);
 }
 
+static inline __u64 hi_lo_readq_relaxed(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	high = readl_relaxed(p + 1);
+	low = readl_relaxed(p);
+
+	return low + ((u64)high << 32);
+}
+
+static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr)
+{
+	writel_relaxed(val >> 32, addr + 4);
+	writel_relaxed(val, addr);
+}
+
 #ifndef readq
 #define readq hi_lo_readq
 #endif
@@ -29,4 +46,12 @@
 #define writeq hi_lo_writeq
 #endif
 
+#ifndef readq_relaxed
+#define readq_relaxed hi_lo_readq_relaxed
+#endif
+
+#ifndef writeq_relaxed
+#define writeq_relaxed hi_lo_writeq_relaxed
+#endif
+
 #endif	/* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */
diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h
index 1a4315f..084461a 100644
--- a/include/linux/io-64-nonatomic-lo-hi.h
+++ b/include/linux/io-64-nonatomic-lo-hi.h
@@ -21,6 +21,23 @@
 	writel(val >> 32, addr + 4);
 }
 
+static inline __u64 lo_hi_readq_relaxed(const volatile void __iomem *addr)
+{
+	const volatile u32 __iomem *p = addr;
+	u32 low, high;
+
+	low = readl_relaxed(p);
+	high = readl_relaxed(p + 1);
+
+	return low + ((u64)high << 32);
+}
+
+static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr)
+{
+	writel_relaxed(val, addr);
+	writel_relaxed(val >> 32, addr + 4);
+}
+
 #ifndef readq
 #define readq lo_hi_readq
 #endif
@@ -29,4 +46,12 @@
 #define writeq lo_hi_writeq
 #endif
 
+#ifndef readq_relaxed
+#define readq_relaxed lo_hi_readq_relaxed
+#endif
+
+#ifndef writeq_relaxed
+#define writeq_relaxed lo_hi_writeq_relaxed
+#endif
+
 #endif	/* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ef7a6ec..664683a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -30,6 +30,7 @@
 #define IOMMU_WRITE	(1 << 1)
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC	(1 << 3)
+#define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */
 
 struct iommu_ops;
 struct iommu_group;
@@ -78,6 +79,7 @@
 struct iommu_domain {
 	unsigned type;
 	const struct iommu_ops *ops;
+	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
 	iommu_fault_handler_t handler;
 	void *handler_token;
 	struct iommu_domain_geometry geometry;
@@ -155,8 +157,7 @@
  * @domain_set_windows: Set the number of windows for a domain
  * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
- * @pgsize_bitmap: bitmap of supported page sizes
- * @priv: per-instance data private to the iommu driver
+ * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
 struct iommu_ops {
 	bool (*capable)(enum iommu_cap);
@@ -198,7 +199,6 @@
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
 
 	unsigned long pgsize_bitmap;
-	void *priv;
 };
 
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 0b65543..6230064 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -26,6 +26,9 @@
 
 /*
  * IO resources have these defined flags.
+ *
+ * PCI devices expose these flags to userspace in the "resource" sysfs file,
+ * so don't move them.
  */
 #define IORESOURCE_BITS		0x000000ff	/* Bus-specific bits */
 
@@ -110,6 +113,7 @@
 
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED		(1<<4)	/* Do not move resource */
+#define IORESOURCE_PCI_EA_BEI		(1<<5)	/* BAR Equivalent Indicator */
 
 /*
  * I/O Resource Descriptors
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 1551b5b..f0f5d26 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -34,7 +34,7 @@
 /**
  * struct irq_bypass_producer - IRQ bypass producer definition
  * @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer
+ * @token: opaque token to match between producer and consumer (non-NULL)
  * @irq: Linux IRQ number for the producer device
  * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
  * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
@@ -60,7 +60,7 @@
 /**
  * struct irq_bypass_consumer - IRQ bypass consumer definition
  * @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer
+ * @token: opaque token to match between producer and consumer (non-NULL)
  * @add_producer: Connect the IRQ consumer to an IRQ producer
  * @del_producer: Disconnect the IRQ consumer from an IRQ producer
  * @stop: Perform any quiesce operations necessary prior to add/del (optional)
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
new file mode 100644
index 0000000..c647b05
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -0,0 +1,34 @@
+/*
+ * include/linux/irqchip/arm-gic-common.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+	GIC_V2,
+	GIC_V3,
+};
+
+struct gic_kvm_info {
+	/* GIC type */
+	enum gic_type	type;
+	/* Virtual CPU interface */
+	struct resource vcpu;
+	/* Interrupt number */
+	unsigned int	maint_irq;
+	/* Virtual control interface */
+	struct resource vctrl;
+};
+
+const struct gic_kvm_info *gic_get_kvm_info(void);
+
+#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 80f89e4..81f930b 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -103,6 +103,7 @@
 #define GIC_VPE_SWINT0_MAP_OFS		0x0054
 #define GIC_VPE_SWINT1_MAP_OFS		0x0058
 #define GIC_VPE_OTHER_ADDR_OFS		0x0080
+#define GIC_VP_IDENT_OFS		0x0088
 #define GIC_VPE_WD_CONFIG0_OFS		0x0090
 #define GIC_VPE_WD_COUNT0_OFS		0x0094
 #define GIC_VPE_WD_INITIAL0_OFS		0x0098
@@ -211,6 +212,10 @@
 #define GIC_VPE_SMASK_FDC_SHF		6
 #define GIC_VPE_SMASK_FDC_MSK		(MSK(1) << GIC_VPE_SMASK_FDC_SHF)
 
+/* GIC_VP_IDENT fields */
+#define GIC_VP_IDENT_VCNUM_SHF		0
+#define GIC_VP_IDENT_VCNUM_MSK		(MSK(6) << GIC_VP_IDENT_VCNUM_SHF)
+
 /* GIC nomenclature for Core Interrupt Pins. */
 #define GIC_CPU_INT0		0 /* Core Interrupt 2 */
 #define GIC_CPU_INT1		1 /* .		      */
@@ -278,4 +283,16 @@
 
 #endif /* CONFIG_MIPS_GIC */
 
+/**
+ * gic_read_local_vp_id() - read the local VPs VCNUM
+ *
+ * Read the VCNUM of the local VP from the GIC_VP_IDENT register and
+ * return it to the caller. This ID should be used to refer to the VP
+ * via the GICs VP-other region, or when calculating an offset to a
+ * bit representing the VP in interrupt masks.
+ *
+ * Return: The VCNUM value for the local VP.
+ */
+extern unsigned gic_read_local_vp_id(void);
+
 #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 7463355..eaee981 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,7 +45,6 @@
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
 	time_t		expiry;		/* Expiry time of key */
-	bool		trusted;	/* True if key is trusted */
 };
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
diff --git a/include/linux/key.h b/include/linux/key.h
index 5f5b112..7229147 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -173,11 +173,9 @@
 #define KEY_FLAG_NEGATIVE	5	/* set if key is negative */
 #define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
 #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
-#define KEY_FLAG_TRUSTED	8	/* set if key is trusted */
-#define KEY_FLAG_TRUSTED_ONLY	9	/* set if keyring only accepts links to trusted keys */
-#define KEY_FLAG_BUILTIN	10	/* set if key is builtin */
-#define KEY_FLAG_ROOT_CAN_INVAL	11	/* set if key can be invalidated by root without permission */
-#define KEY_FLAG_KEEP		12	/* set if key should not be removed */
+#define KEY_FLAG_BUILTIN	8	/* set if key is built in to the kernel */
+#define KEY_FLAG_ROOT_CAN_INVAL	9	/* set if key can be invalidated by root without permission */
+#define KEY_FLAG_KEEP		10	/* set if key should not be removed */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
@@ -205,6 +203,20 @@
 		};
 		int reject_error;
 	};
+
+	/* This is set on a keyring to restrict the addition of a link to a key
+	 * to it.  If this method isn't provided then it is assumed that the
+	 * keyring is open to any addition.  It is ignored for non-keyring
+	 * keys.
+	 *
+	 * This is intended for use with rings of trusted keys whereby addition
+	 * to the keyring needs to be controlled.  KEY_ALLOC_BYPASS_RESTRICTION
+	 * overrides this, allowing the kernel to add extra keys without
+	 * restriction.
+	 */
+	int (*restrict_link)(struct key *keyring,
+			     const struct key_type *type,
+			     const union key_payload *payload);
 };
 
 extern struct key *key_alloc(struct key_type *type,
@@ -212,14 +224,17 @@
 			     kuid_t uid, kgid_t gid,
 			     const struct cred *cred,
 			     key_perm_t perm,
-			     unsigned long flags);
+			     unsigned long flags,
+			     int (*restrict_link)(struct key *,
+						  const struct key_type *,
+						  const union key_payload *));
 
 
-#define KEY_ALLOC_IN_QUOTA	0x0000	/* add to quota, reject if would overrun */
-#define KEY_ALLOC_QUOTA_OVERRUN	0x0001	/* add to quota, permit even if overrun */
-#define KEY_ALLOC_NOT_IN_QUOTA	0x0002	/* not in quota */
-#define KEY_ALLOC_TRUSTED	0x0004	/* Key should be flagged as trusted */
-#define KEY_ALLOC_BUILT_IN	0x0008	/* Key is built into kernel */
+#define KEY_ALLOC_IN_QUOTA		0x0000	/* add to quota, reject if would overrun */
+#define KEY_ALLOC_QUOTA_OVERRUN		0x0001	/* add to quota, permit even if overrun */
+#define KEY_ALLOC_NOT_IN_QUOTA		0x0002	/* not in quota */
+#define KEY_ALLOC_BUILT_IN		0x0004	/* Key is built into kernel */
+#define KEY_ALLOC_BYPASS_RESTRICTION	0x0008	/* Override the check on restricted keyrings */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
@@ -288,8 +303,15 @@
 				 const struct cred *cred,
 				 key_perm_t perm,
 				 unsigned long flags,
+				 int (*restrict_link)(struct key *,
+						      const struct key_type *,
+						      const union key_payload *),
 				 struct key *dest);
 
+extern int restrict_link_reject(struct key *keyring,
+				const struct key_type *type,
+				const union key_payload *payload);
+
 extern int keyring_clear(struct key *keyring);
 
 extern key_ref_t keyring_search(key_ref_t keyring,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5276fe0..b1fa8f1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,10 @@
 
 #include <asm/kvm_host.h>
 
+#ifndef KVM_MAX_VCPU_ID
+#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
+#endif
+
 /*
  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
  * in kvm, other bits are visible for userspace which are defined in
@@ -225,6 +229,7 @@
 	sigset_t sigset;
 	struct kvm_vcpu_stat stat;
 	unsigned int halt_poll_ns;
+	bool valid_wakeup;
 
 #ifdef CONFIG_HAS_IOMEM
 	int mmio_needed;
@@ -447,12 +452,13 @@
 
 static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL;
 	int i;
 
-	if (id < 0 || id >= KVM_MAX_VCPUS)
+	if (id < 0)
 		return NULL;
-	vcpu = kvm_get_vcpu(kvm, id);
+	if (id < KVM_MAX_VCPUS)
+		vcpu = kvm_get_vcpu(kvm, id);
 	if (vcpu && vcpu->vcpu_id == id)
 		return vcpu;
 	kvm_for_each_vcpu(i, vcpu, kvm)
@@ -651,6 +657,7 @@
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
@@ -1091,6 +1098,11 @@
 
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
+	/*
+	 * Ensure the rest of the request is published to kvm_check_request's
+	 * caller.  Paired with the smp_mb__after_atomic in kvm_check_request.
+	 */
+	smp_wmb();
 	set_bit(req, &vcpu->requests);
 }
 
@@ -1098,6 +1110,12 @@
 {
 	if (test_bit(req, &vcpu->requests)) {
 		clear_bit(req, &vcpu->requests);
+
+		/*
+		 * Ensure the rest of the request is visible to kvm_check_request's
+		 * caller.  Paired with the smp_wmb in kvm_make_request.
+		 */
+		smp_mb__after_atomic();
 		return true;
 	} else {
 		return false;
@@ -1169,6 +1187,7 @@
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 
 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+bool kvm_arch_has_irq_bypass(void);
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
 			   struct irq_bypass_producer *);
 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
@@ -1179,4 +1198,18 @@
 				  uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 
+#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
+/* If we wakeup during the poll time, was it a sucessful poll? */
+static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
+{
+	return vcpu->valid_wakeup;
+}
+
+#else
+static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
+{
+	return true;
+}
+#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
+
 #endif
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 512fd00..7ae3976 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1805,7 +1805,6 @@
 	struct list_head tun_dev_attach_queue;
 	struct list_head tun_dev_attach;
 	struct list_head tun_dev_open;
-	struct list_head skb_owned_by;
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	struct list_head xfrm_policy_alloc_security;
@@ -1894,5 +1893,10 @@
 #else
 static inline void __init yama_add_hooks(void) { }
 #endif
+#ifdef CONFIG_SECURITY_LOADPIN
+void __init loadpin_add_hooks(void);
+#else
+static inline void loadpin_add_hooks(void) { };
+#endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index 238c8db..c8e0164 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -95,6 +95,7 @@
 #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX	BIT(0)
 
 #define IMX6Q_GPR1_PCIE_REQ_MASK		(0x3 << 30)
+#define IMX6Q_GPR1_PCIE_SW_RST			BIT(29)
 #define IMX6Q_GPR1_PCIE_EXIT_L1			BIT(28)
 #define IMX6Q_GPR1_PCIE_RDY_L23			BIT(27)
 #define IMX6Q_GPR1_PCIE_ENTER_L1		BIT(26)
@@ -447,5 +448,11 @@
 #define IMX6UL_GPR1_ENET2_CLK_OUTPUT		(0x1 << 18)
 #define IMX6UL_GPR1_ENET_CLK_DIR		(0x3 << 17)
 #define IMX6UL_GPR1_ENET_CLK_OUTPUT		(0x3 << 17)
+#define IMX6UL_GPR1_SAI1_MCLK_DIR		(0x1 << 19)
+#define IMX6UL_GPR1_SAI2_MCLK_DIR		(0x1 << 20)
+#define IMX6UL_GPR1_SAI3_MCLK_DIR		(0x1 << 21)
+#define IMX6UL_GPR1_SAI_MCLK_MASK		(0x7 << 19)
+#define MCLK_DIR(x) (x == 1 ? IMX6UL_GPR1_SAI1_MCLK_DIR : x == 2 ? \
+		     IMX6UL_GPR1_SAI2_MCLK_DIR : IMX6UL_GPR1_SAI3_MCLK_DIR)
 
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 01c0a55..3786473 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -47,10 +47,6 @@
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
-extern int pci_register_io_range(phys_addr_t addr, resource_size_t size);
-extern unsigned long pci_address_to_pio(phys_addr_t addr);
-extern phys_addr_t pci_pio_to_address(unsigned long pio);
-
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
 extern struct of_pci_range *of_pci_range_parser_one(
@@ -86,11 +82,6 @@
 	return NULL;
 }
 
-static inline phys_addr_t pci_pio_to_address(unsigned long pio)
-{
-	return 0;
-}
-
 static inline int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node)
 {
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index ffbe470..bd02b44 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -12,7 +12,7 @@
 			     size_t *size);
 
 extern void of_iommu_init(void);
-extern struct iommu_ops *of_iommu_configure(struct device *dev,
+extern const struct iommu_ops *of_iommu_configure(struct device *dev,
 					struct device_node *master_np);
 
 #else
@@ -25,7 +25,7 @@
 }
 
 static inline void of_iommu_init(void) { }
-static inline struct iommu_ops *of_iommu_configure(struct device *dev,
+static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 					 struct device_node *master_np)
 {
 	return NULL;
@@ -33,8 +33,8 @@
 
 #endif	/* CONFIG_OF_IOMMU */
 
-void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops);
-struct iommu_ops *of_iommu_get_ops(struct device_node *np);
+void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops);
+const struct iommu_ops *of_iommu_get_ops(struct device_node *np);
 
 extern struct of_device_id __iommu_of_table;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 932ec74..b67e4df 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -166,8 +166,6 @@
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
-	/* Flag to indicate the device uses dma_alias_devfn */
-	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
 	/* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */
 	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5),
 	/* Do not use bus resets for device */
@@ -273,7 +271,7 @@
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;		/* which interrupt pin this device uses */
 	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
-	u8		dma_alias_devfn;/* devfn of DMA alias, if any */
+	unsigned long	*dma_alias_mask;/* mask of enabled devfn aliases */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
@@ -1165,6 +1163,9 @@
 			void *alignf_data);
 
 
+int pci_register_io_range(phys_addr_t addr, resource_size_t size);
+unsigned long pci_address_to_pio(phys_addr_t addr);
+phys_addr_t pci_pio_to_address(unsigned long pio);
 int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
 
 static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
@@ -1481,6 +1482,8 @@
 { return -EIO; }
 static inline void pci_release_regions(struct pci_dev *dev) { }
 
+static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
+
 static inline void pci_block_cfg_access(struct pci_dev *dev) { }
 static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
 { return 0; }
@@ -1664,7 +1667,7 @@
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
 int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
-void pci_dev_specific_enable_acs(struct pci_dev *dev);
+int pci_dev_specific_enable_acs(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
@@ -1673,7 +1676,10 @@
 {
 	return -ENOTTY;
 }
-static inline void pci_dev_specific_enable_acs(struct pci_dev *dev) { }
+static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
+{
+	return -ENOTTY;
+}
 #endif
 
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
@@ -1989,6 +1995,8 @@
 }
 #endif
 
+void pci_add_dma_alias(struct pci_dev *dev, u8 devfn);
+bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2);
 int pci_for_each_dma_alias(struct pci_dev *pdev,
 			   int (*fn)(struct pci_dev *pdev,
 				     u16 alias, void *data), void *data);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 247da8c..c58752f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2604,6 +2604,24 @@
 #define PCI_DEVICE_ID_INTEL_82441	0x1237
 #define PCI_DEVICE_ID_INTEL_82380FB	0x124b
 #define PCI_DEVICE_ID_INTEL_82439	0x1250
+#define PCI_DEVICE_ID_INTEL_LIGHT_RIDGE             0x1513 /* Tbt 1 Gen 1 */
+#define PCI_DEVICE_ID_INTEL_EAGLE_RIDGE             0x151a
+#define PCI_DEVICE_ID_INTEL_LIGHT_PEAK              0x151b
+#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C         0x1547 /* Tbt 1 Gen 2 */
+#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_2C         0x1548
+#define PCI_DEVICE_ID_INTEL_PORT_RIDGE              0x1549
+#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_NHI    0x1566 /* Tbt 1 Gen 3 */
+#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_BRIDGE 0x1567
+#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_NHI    0x1568
+#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_BRIDGE 0x1569
+#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI     0x156a /* Thunderbolt 2 */
+#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE  0x156b
+#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI     0x156c
+#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE  0x156d
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_NHI     0x1575 /* Thunderbolt 3 */
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE  0x1576
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI     0x1577
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE  0x1578
 #define PCI_DEVICE_ID_INTEL_80960_RP	0x1960
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index 4f1089f..afcd130 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -21,6 +21,8 @@
 #define PCIE_PORT_SERVICE_HP		(1 << PCIE_PORT_SERVICE_HP_SHIFT)
 #define PCIE_PORT_SERVICE_VC_SHIFT	3	/* Virtual Channel */
 #define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)
+#define PCIE_PORT_SERVICE_DPC_SHIFT	4	/* Downstream Port Containment */
+#define PCIE_PORT_SERVICE_DPC		(1 << PCIE_PORT_SERVICE_DPC_SHIFT)
 
 struct pcie_device {
 	int		irq;	    /* Service IRQ/MSI/MSI-X Vector */
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 9ba59fc..a42e57d 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -144,6 +144,12 @@
 extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 				struct device *dev, void *driver_data);
 extern void pinctrl_unregister(struct pinctrl_dev *pctldev);
+extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev,
+				struct pinctrl_desc *pctldesc,
+				void *driver_data);
+extern void devm_pinctrl_unregister(struct device *dev,
+				struct pinctrl_dev *pctldev);
+
 extern bool pin_is_valid(struct pinctrl_dev *pctldev, int pin);
 extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev,
 				struct pinctrl_gpio_range *range);
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 03b6095..d15d8ba 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -21,15 +21,15 @@
  * @dma_dev:	required DMA master device
  * @src_id:	src request line
  * @dst_id:	dst request line
- * @src_master: src master for transfers on allocated channel.
- * @dst_master: dest master for transfers on allocated channel.
+ * @m_master:	memory master for transfers on allocated channel
+ * @p_master:	peripheral master for transfers on allocated channel
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
 	u8			src_id;
 	u8			dst_id;
-	u8			src_master;
-	u8			dst_master;
+	u8			m_master;
+	u8			p_master;
 };
 
 /**
@@ -43,7 +43,7 @@
  * @block_size: Maximum block size supported by the controller
  * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
- *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+ *		(in bytes, power of 2)
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
@@ -55,7 +55,7 @@
 #define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
 #define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
 	unsigned char	chan_priority;
-	unsigned short	block_size;
+	unsigned int	block_size;
 	unsigned char	nr_masters;
 	unsigned char	data_width[DW_DMA_MAX_NR_MASTERS];
 };
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3fbe814..639be26 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -28,6 +28,21 @@
 	sigset_t signal;
 };
 
+#ifndef HAVE_ARCH_COPY_SIGINFO
+
+#include <linux/string.h>
+
+static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
+{
+	if (from->si_code < 0)
+		memcpy(to, from, sizeof(*to));
+	else
+		/* _sigchld is currently the largest know union member */
+		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
+}
+
+#endif
+
 /*
  * Define some primitives to manipulate sigset_t.
  */
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index dabe643..5ce9538 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -3,6 +3,8 @@
 
 #include <linux/types.h>
 
+struct file;
+
 /* Descriptions of the types of units to
  * print in */
 enum string_size_units {
@@ -68,4 +70,8 @@
 	return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only);
 }
 
+char *kstrdup_quotable(const char *src, gfp_t gfp);
+char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp);
+char *kstrdup_quotable_file(struct file *file, gfp_t gfp);
+
 #endif
diff --git a/include/linux/verification.h b/include/linux/verification.h
new file mode 100644
index 0000000..a10549a
--- /dev/null
+++ b/include/linux/verification.h
@@ -0,0 +1,49 @@
+/* Signature verification
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_VERIFICATION_H
+#define _LINUX_VERIFICATION_H
+
+/*
+ * The use to which an asymmetric key is being put.
+ */
+enum key_being_used_for {
+	VERIFYING_MODULE_SIGNATURE,
+	VERIFYING_FIRMWARE_SIGNATURE,
+	VERIFYING_KEXEC_PE_SIGNATURE,
+	VERIFYING_KEY_SIGNATURE,
+	VERIFYING_KEY_SELF_SIGNATURE,
+	VERIFYING_UNSPECIFIED_SIGNATURE,
+	NR__KEY_BEING_USED_FOR
+};
+extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
+
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+
+struct key;
+
+extern int verify_pkcs7_signature(const void *data, size_t len,
+				  const void *raw_pkcs7, size_t pkcs7_len,
+				  struct key *trusted_keys,
+				  enum key_being_used_for usage,
+				  int (*view_content)(void *ctx,
+						      const void *data, size_t len,
+						      size_t asn1hdrlen),
+				  void *ctx);
+
+#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION
+extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
+				   struct key *trusted_keys,
+				   enum key_being_used_for usage);
+#endif
+
+#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+#endif /* _LINUX_VERIFY_PEFILE_H */
diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h
deleted file mode 100644
index da2049b..0000000
--- a/include/linux/verify_pefile.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Signed PE file verification
- *
- * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _LINUX_VERIFY_PEFILE_H
-#define _LINUX_VERIFY_PEFILE_H
-
-#include <crypto/public_key.h>
-
-extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
-				   struct key *trusted_keyring,
-				   enum key_being_used_for usage,
-				   bool *_trusted);
-
-#endif /* _LINUX_VERIFY_PEFILE_H */
diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h
new file mode 100644
index 0000000..9b1d43d
--- /dev/null
+++ b/include/soc/nps/common.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SOC_NPS_COMMON_H
+#define SOC_NPS_COMMON_H
+
+#ifdef CONFIG_SMP
+#define NPS_IPI_IRQ					5
+#endif
+
+#define NPS_HOST_REG_BASE			0xF6000000
+
+#define NPS_MSU_BLKID				0x018
+
+#define CTOP_INST_RSPI_GIC_0_R12		0x3C56117E
+#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST	0x5B60
+#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM	0x00010422
+
+#ifndef __ASSEMBLY__
+
+/* In order to increase compilation test coverage */
+#ifdef CONFIG_ARC
+static inline void nps_ack_gic(void)
+{
+	__asm__ __volatile__ (
+	"       .word %0\n"
+	:
+	: "i"(CTOP_INST_RSPI_GIC_0_R12)
+	: "memory");
+}
+#else
+static inline void nps_ack_gic(void) { }
+#define write_aux_reg(r, v)
+#define read_aux_reg(r) 0
+#endif
+
+/* CPU global ID */
+struct global_id {
+	union {
+		struct {
+#ifdef CONFIG_EZNPS_MTM_EXT
+			u32 __reserved:20, cluster:4, core:4, thread:4;
+#else
+			u32 __reserved:24, cluster:4, core:4;
+#endif
+		};
+		u32 value;
+	};
+};
+
+/*
+ * Convert logical to physical CPU IDs
+ *
+ * The conversion swap bits 1 and 2 of cluster id (out of 4 bits)
+ * Now quad of logical clusters id's are adjacent physically,
+ * and not like the id's physically came with each cluster.
+ * Below table is 4x4 mesh of core clusters as it layout on chip.
+ * Cluster ids are in format: logical (physical)
+ *
+ *    -----------------   ------------------
+ * 3 |  5 (3)   7 (7)  | | 13 (11)   15 (15)|
+ *
+ * 2 |  4 (2)   6 (6)  | | 12 (10)   14 (14)|
+ *    -----------------   ------------------
+ * 1 |  1 (1)   3 (5)  | |  9  (9)   11 (13)|
+ *
+ * 0 |  0 (0)   2 (4)  | |  8  (8)   10 (12)|
+ *    -----------------   ------------------
+ *       0       1            2        3
+ */
+static inline int nps_cluster_logic_to_phys(int cluster)
+{
+#ifdef __arc__
+	 __asm__ __volatile__(
+	"       mov r3,%0\n"
+	"       .short %1\n"
+	"       .word %2\n"
+	"       mov %0,r3\n"
+	: "+r"(cluster)
+	: "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST),
+	  "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM)
+	: "r3");
+#endif
+
+	return cluster;
+}
+
+#define NPS_CPU_TO_CLUSTER_NUM(cpu) \
+	({ struct global_id gid; gid.value = cpu; \
+		nps_cluster_logic_to_phys(gid.cluster); })
+
+struct nps_host_reg_address {
+	union {
+		struct {
+			u32 base:8, cl_x:4, cl_y:4,
+			blkid:6, reg:8, __reserved:2;
+		};
+		u32 value;
+	};
+};
+
+struct nps_host_reg_address_non_cl {
+	union {
+		struct {
+			u32 base:7, blkid:11, reg:12, __reserved:2;
+		};
+		u32 value;
+	};
+};
+
+static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg)
+{
+	struct nps_host_reg_address_non_cl reg_address;
+
+	reg_address.value = NPS_HOST_REG_BASE;
+	reg_address.blkid = blkid;
+	reg_address.reg = reg;
+
+	return (void *)reg_address.value;
+}
+
+static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg)
+{
+	struct nps_host_reg_address reg_address;
+	u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu);
+
+	reg_address.value = NPS_HOST_REG_BASE;
+	reg_address.cl_x  = (cl >> 2) & 0x3;
+	reg_address.cl_y  = cl & 0x3;
+	reg_address.blkid = blkid;
+	reg_address.reg   = reg;
+
+	return (void *)reg_address.value;
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* SOC_NPS_COMMON_H */
diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index f86ef5e..67be244 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -51,6 +51,16 @@
 	void *filter_data);
 struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream);
 
+/*
+ * The DAI supports packed transfers, eg 2 16-bit samples in a 32-bit word.
+ * If this flag is set the dmaengine driver won't put any restriction on
+ * the supported sample formats and set the DMA transfer size to undefined.
+ * The DAI driver is responsible to disable any unsupported formats in it's
+ * configuration and catch corner cases that are not already handled in
+ * the ALSA core.
+ */
+#define SND_DMAENGINE_PCM_DAI_FLAG_PACK BIT(0)
+
 /**
  * struct snd_dmaengine_dai_dma_data - DAI DMA configuration data
  * @addr: Address of the DAI data source or destination register.
@@ -63,6 +73,7 @@
  * requesting the DMA channel.
  * @chan_name: Custom channel name to use when requesting DMA channel.
  * @fifo_size: FIFO size of the DAI controller in bytes
+ * @flags: PCM_DAI flags, only SND_DMAENGINE_PCM_DAI_FLAG_PACK for now
  */
 struct snd_dmaengine_dai_dma_data {
 	dma_addr_t addr;
@@ -72,6 +83,7 @@
 	void *filter_data;
 	const char *chan_name;
 	unsigned int fifo_size;
+	unsigned int flags;
 };
 
 void snd_dmaengine_pcm_set_config_from_dai_data(
diff --git a/include/sound/hda_chmap.h b/include/sound/hda_chmap.h
index e20d219..babd445 100644
--- a/include/sound/hda_chmap.h
+++ b/include/sound/hda_chmap.h
@@ -36,6 +36,8 @@
 	int (*chmap_validate)(struct hdac_chmap *hchmap, int ca,
 			int channels, unsigned char *chmap);
 
+	int (*get_spk_alloc)(struct hdac_device *hdac, int pcm_idx);
+
 	void (*get_chmap)(struct hdac_device *hdac, int pcm_idx,
 					unsigned char *chmap);
 	void (*set_chmap)(struct hdac_device *hdac, int pcm_idx,
diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h
index f5842bc..796cabf 100644
--- a/include/sound/hda_i915.h
+++ b/include/sound/hda_i915.h
@@ -10,8 +10,8 @@
 int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable);
 int snd_hdac_display_power(struct hdac_bus *bus, bool enable);
 void snd_hdac_i915_set_bclk(struct hdac_bus *bus);
-int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid, int rate);
-int snd_hdac_acomp_get_eld(struct hdac_bus *bus, hda_nid_t nid,
+int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid, int rate);
+int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid,
 			   bool *audio_enabled, char *buffer, int max_bytes);
 int snd_hdac_i915_init(struct hdac_bus *bus);
 int snd_hdac_i915_exit(struct hdac_bus *bus);
@@ -28,12 +28,12 @@
 static inline void snd_hdac_i915_set_bclk(struct hdac_bus *bus)
 {
 }
-static inline int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid,
-					   int rate)
+static inline int snd_hdac_sync_audio_rate(struct hdac_device *codec,
+					   hda_nid_t nid, int rate)
 {
 	return 0;
 }
-static inline int snd_hdac_acomp_get_eld(struct hdac_bus *bus, hda_nid_t nid,
+static inline int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid,
 					 bool *audio_enabled, char *buffer,
 					 int max_bytes)
 {
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index 07fa592..b9593b2 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -14,6 +14,8 @@
  * @gtscap: gts capabilities pointer
  * @drsmcap: dma resume capabilities pointer
  * @hlink_list: link list of HDA links
+ * @lock: lock for link mgmt
+ * @cmd_dma_state: state of cmd DMAs: CORB and RIRB
  */
 struct hdac_ext_bus {
 	struct hdac_bus bus;
@@ -27,6 +29,9 @@
 	void __iomem *drsmcap;
 
 	struct list_head hlink_list;
+
+	struct mutex lock;
+	bool cmd_dma_state;
 };
 
 int snd_hdac_ext_bus_init(struct hdac_ext_bus *sbus, struct device *dev,
@@ -142,6 +147,9 @@
 	void __iomem *ml_addr; /* link output stream reg pointer */
 	u32 lcaps;   /* link capablities */
 	u16 lsdiid;  /* link sdi identifier */
+
+	int ref_count;
+
 	struct list_head list;
 };
 
@@ -154,6 +162,11 @@
 void snd_hdac_ext_link_clear_stream_id(struct hdac_ext_link *link,
 				 int stream);
 
+int snd_hdac_ext_bus_link_get(struct hdac_ext_bus *ebus,
+				struct hdac_ext_link *link);
+int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
+				struct hdac_ext_link *link);
+
 /* update register macro */
 #define snd_hdac_updatel(addr, reg, mask, val)		\
 	writel(((readl(addr + reg) & ~(mask)) | (val)), \
diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h
new file mode 100644
index 0000000..fc3a481
--- /dev/null
+++ b/include/sound/hdmi-codec.h
@@ -0,0 +1,100 @@
+/*
+ * hdmi-codec.h - HDMI Codec driver API
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Jyri Sarha <jsarha@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __HDMI_CODEC_H__
+#define __HDMI_CODEC_H__
+
+#include <linux/hdmi.h>
+#include <drm/drm_edid.h>
+#include <sound/asoundef.h>
+#include <uapi/sound/asound.h>
+
+/*
+ * Protocol between ASoC cpu-dai and HDMI-encoder
+ */
+struct hdmi_codec_daifmt {
+	enum {
+		HDMI_I2S,
+		HDMI_RIGHT_J,
+		HDMI_LEFT_J,
+		HDMI_DSP_A,
+		HDMI_DSP_B,
+		HDMI_AC97,
+		HDMI_SPDIF,
+	} fmt;
+	int bit_clk_inv:1;
+	int frame_clk_inv:1;
+	int bit_clk_master:1;
+	int frame_clk_master:1;
+};
+
+/*
+ * HDMI audio parameters
+ */
+struct hdmi_codec_params {
+	struct hdmi_audio_infoframe cea;
+	struct snd_aes_iec958 iec;
+	int sample_rate;
+	int sample_width;
+	int channels;
+};
+
+struct hdmi_codec_ops {
+	/*
+	 * Called when ASoC starts an audio stream setup.
+	 * Optional
+	 */
+	int (*audio_startup)(struct device *dev);
+
+	/*
+	 * Configures HDMI-encoder for audio stream.
+	 * Mandatory
+	 */
+	int (*hw_params)(struct device *dev,
+			 struct hdmi_codec_daifmt *fmt,
+			 struct hdmi_codec_params *hparms);
+
+	/*
+	 * Shuts down the audio stream.
+	 * Mandatory
+	 */
+	void (*audio_shutdown)(struct device *dev);
+
+	/*
+	 * Mute/unmute HDMI audio stream.
+	 * Optional
+	 */
+	int (*digital_mute)(struct device *dev, bool enable);
+
+	/*
+	 * Provides EDID-Like-Data from connected HDMI device.
+	 * Optional
+	 */
+	int (*get_eld)(struct device *dev, uint8_t *buf, size_t len);
+};
+
+/* HDMI codec initalization data */
+struct hdmi_codec_pdata {
+	const struct hdmi_codec_ops *ops;
+	uint i2s:1;
+	uint spdif:1;
+	int max_i2s_channels;
+};
+
+#define HDMI_CODEC_DRV_NAME "hdmi-audio-codec"
+
+#endif /* __HDMI_CODEC_H__ */
diff --git a/include/sound/pcm_iec958.h b/include/sound/pcm_iec958.h
index 0eed397..36f023a 100644
--- a/include/sound/pcm_iec958.h
+++ b/include/sound/pcm_iec958.h
@@ -6,4 +6,6 @@
 int snd_pcm_create_iec958_consumer(struct snd_pcm_runtime *runtime, u8 *cs,
 	size_t len);
 
+int snd_pcm_create_iec958_consumer_hw_params(struct snd_pcm_hw_params *params,
+					     u8 *cs, size_t len);
 #endif
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 9706946..3101d53 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -100,6 +100,7 @@
 {       .id = snd_soc_dapm_mixer_named_ctl, .name = wname, \
 	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
 	.kcontrol_news = wcontrols, .num_kcontrols = wncontrols}
+/* DEPRECATED: use SND_SOC_DAPM_SUPPLY */
 #define SND_SOC_DAPM_MICBIAS(wname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_micbias, .name = wname, \
 	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
@@ -473,7 +474,7 @@
 	snd_soc_dapm_out_drv,			/* output driver */
 	snd_soc_dapm_adc,			/* analog to digital converter */
 	snd_soc_dapm_dac,			/* digital to analog converter */
-	snd_soc_dapm_micbias,		/* microphone bias (power) */
+	snd_soc_dapm_micbias,		/* microphone bias (power) - DEPRECATED: use snd_soc_dapm_supply */
 	snd_soc_dapm_mic,			/* microphone */
 	snd_soc_dapm_hp,			/* headphones */
 	snd_soc_dapm_spk,			/* speaker */
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 02b4a21..fd7b58a 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1002,7 +1002,7 @@
 	 */
 	const char *platform_name;
 	struct device_node *platform_of_node;
-	int be_id;	/* optional ID for machine driver BE identification */
+	int id;	/* optional ID for machine driver link identification */
 
 	const struct snd_soc_pcm_stream *params;
 	unsigned int num_params;
@@ -1683,6 +1683,9 @@
 int snd_soc_register_dai(struct snd_soc_component *component,
 	struct snd_soc_dai_driver *dai_drv);
 
+struct snd_soc_dai *snd_soc_find_dai(
+	const struct snd_soc_dai_link_component *dlc);
+
 #include <sound/soc-dai.h>
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index aa69253..526fb3d 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -38,22 +38,25 @@
 );
 
 TRACE_EVENT(kvm_vcpu_wakeup,
-	    TP_PROTO(__u64 ns, bool waited),
-	    TP_ARGS(ns, waited),
+	    TP_PROTO(__u64 ns, bool waited, bool valid),
+	    TP_ARGS(ns, waited, valid),
 
 	TP_STRUCT__entry(
 		__field(	__u64,		ns		)
 		__field(	bool,		waited		)
+		__field(	bool,		valid		)
 	),
 
 	TP_fast_assign(
 		__entry->ns		= ns;
 		__entry->waited		= waited;
+		__entry->valid		= valid;
 	),
 
-	TP_printk("%s time %lld ns",
+	TP_printk("%s time %lld ns, polling %s",
 		  __entry->waited ? "wait" : "poll",
-		  __entry->ns)
+		  __entry->ns,
+		  __entry->valid ? "valid" : "invalid")
 );
 
 #if defined(CONFIG_HAVE_KVM_IRQFD)
diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index b0a7dd6..adcbef4 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -68,14 +68,15 @@
 struct i2c_msg {
 	__u16 addr;	/* slave address			*/
 	__u16 flags;
-#define I2C_M_TEN		0x0010	/* this is a ten bit chip address */
 #define I2C_M_RD		0x0001	/* read data, from slave to master */
-#define I2C_M_STOP		0x8000	/* if I2C_FUNC_PROTOCOL_MANGLING */
-#define I2C_M_NOSTART		0x4000	/* if I2C_FUNC_NOSTART */
-#define I2C_M_REV_DIR_ADDR	0x2000	/* if I2C_FUNC_PROTOCOL_MANGLING */
-#define I2C_M_IGNORE_NAK	0x1000	/* if I2C_FUNC_PROTOCOL_MANGLING */
-#define I2C_M_NO_RD_ACK		0x0800	/* if I2C_FUNC_PROTOCOL_MANGLING */
+					/* I2C_M_RD is guaranteed to be 0x0001! */
+#define I2C_M_TEN		0x0010	/* this is a ten bit chip address */
 #define I2C_M_RECV_LEN		0x0400	/* length will be first received byte */
+#define I2C_M_NO_RD_ACK		0x0800	/* if I2C_FUNC_PROTOCOL_MANGLING */
+#define I2C_M_IGNORE_NAK	0x1000	/* if I2C_FUNC_PROTOCOL_MANGLING */
+#define I2C_M_REV_DIR_ADDR	0x2000	/* if I2C_FUNC_PROTOCOL_MANGLING */
+#define I2C_M_NOSTART		0x4000	/* if I2C_FUNC_NOSTART */
+#define I2C_M_STOP		0x8000	/* if I2C_FUNC_PROTOCOL_MANGLING */
 	__u16 len;		/* msg length				*/
 	__u8 *buf;		/* pointer to msg data			*/
 };
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 840cb99..86eddd6 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -12,6 +12,8 @@
 #ifndef _LINUX_KEYCTL_H
 #define _LINUX_KEYCTL_H
 
+#include <linux/types.h>
+
 /* special process keyring shortcut IDs */
 #define KEY_SPEC_THREAD_KEYRING		-1	/* - key ID for thread-specific keyring */
 #define KEY_SPEC_PROCESS_KEYRING	-2	/* - key ID for process-specific keyring */
@@ -57,5 +59,13 @@
 #define KEYCTL_INSTANTIATE_IOV		20	/* instantiate a partially constructed key */
 #define KEYCTL_INVALIDATE		21	/* invalidate a key */
 #define KEYCTL_GET_PERSISTENT		22	/* get a user's persistent keyring */
+#define KEYCTL_DH_COMPUTE		23	/* Compute Diffie-Hellman values */
+
+/* keyctl structures */
+struct keyctl_dh_params {
+	__s32 private;
+	__s32 prime;
+	__s32 base;
+};
 
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7f1f80..05ebf47 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -865,6 +865,7 @@
 #define KVM_CAP_SPAPR_TCE_64 125
 #define KVM_CAP_ARM_PMU_V3 126
 #define KVM_CAP_VCPU_ATTRIBUTES 127
+#define KVM_CAP_MAX_VCPU_ID 128
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 1becea8..4040951 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -670,7 +670,8 @@
 #define PCI_EXT_CAP_ID_SECPCI	0x19	/* Secondary PCIe Capability */
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PASID
+#define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -946,4 +947,21 @@
 #define PCI_TPH_CAP_ST_SHIFT	16	/* st table shift */
 #define PCI_TPH_BASE_SIZEOF	12	/* size with no st table */
 
+/* Downstream Port Containment */
+#define PCI_EXP_DPC_CAP			4	/* DPC Capability */
+#define  PCI_EXP_DPC_CAP_RP_EXT		0x20	/* Root Port Extensions for DPC */
+#define  PCI_EXP_DPC_CAP_POISONED_TLP	0x40	/* Poisoned TLP Egress Blocking Supported */
+#define  PCI_EXP_DPC_CAP_SW_TRIGGER	0x80	/* Software Triggering Supported */
+#define  PCI_EXP_DPC_CAP_DL_ACTIVE	0x1000	/* ERR_COR signal on DL_Active supported */
+
+#define PCI_EXP_DPC_CTL			6	/* DPC control */
+#define  PCI_EXP_DPC_CTL_EN_NONFATAL 	0x02	/* Enable trigger on ERR_NONFATAL message */
+#define  PCI_EXP_DPC_CTL_INT_EN 	0x08	/* DPC Interrupt Enable */
+
+#define PCI_EXP_DPC_STATUS		8	/* DPC Status */
+#define  PCI_EXP_DPC_STATUS_TRIGGER	0x01	/* Trigger Status */
+#define  PCI_EXP_DPC_STATUS_INTERRUPT	0x08	/* Interrupt Status */
+
+#define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index e513a4e..24da334 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -264,4 +264,7 @@
 /* MVEBU UART */
 #define PORT_MVEBU	114
 
+/* Microchip PIC32 UART */
+#define PORT_PIC32	115
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 67bf49d..609cadb 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -672,7 +672,7 @@
 
 /* global timers (device member) */
 #define SNDRV_TIMER_GLOBAL_SYSTEM	0
-#define SNDRV_TIMER_GLOBAL_RTC		1
+#define SNDRV_TIMER_GLOBAL_RTC		1	/* unused */
 #define SNDRV_TIMER_GLOBAL_HPET		2
 #define SNDRV_TIMER_GLOBAL_HRTIMER	3
 
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 64b9dea..937c844 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <keys/system_keyring.h>
+#include <linux/verification.h>
 #include <crypto/public_key.h>
 #include "module-internal.h"
 
@@ -80,6 +80,7 @@
 		return -EBADMSG;
 	}
 
-	return system_verify_data(mod, modlen, mod + modlen, sig_len,
-				  VERIFYING_MODULE_SIGNATURE);
+	return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
+				      NULL, VERIFYING_MODULE_SIGNATURE,
+				      NULL, NULL);
 }
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e1e5a35..7002796 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -513,24 +513,17 @@
  * To be fully secure this must be combined with rlimit
  * to limit the stack allocations too.
  */
-static int mode1_syscalls[] = {
+static const int mode1_syscalls[] = {
 	__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
 	0, /* null terminated */
 };
 
-#ifdef CONFIG_COMPAT
-static int mode1_syscalls_32[] = {
-	__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
-	0, /* null terminated */
-};
-#endif
-
 static void __secure_computing_strict(int this_syscall)
 {
-	int *syscall_whitelist = mode1_syscalls;
+	const int *syscall_whitelist = mode1_syscalls;
 #ifdef CONFIG_COMPAT
 	if (in_compat_syscall())
-		syscall_whitelist = mode1_syscalls_32;
+		syscall_whitelist = get_compat_mode1_syscalls();
 #endif
 	do {
 		if (*syscall_whitelist == this_syscall)
@@ -915,7 +908,7 @@
 
 	fprog = filter->prog->orig_prog;
 	if (!fprog) {
-		/* This must be a new non-cBPF filter, since we save every
+		/* This must be a new non-cBPF filter, since we save
 		 * every cBPF filter's orig_prog above when
 		 * CONFIG_CHECKPOINT_RESTORE is enabled.
 		 */
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 5c88204..ecaac2c 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -10,6 +10,10 @@
 #include <linux/export.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/string_helpers.h>
 
@@ -534,3 +538,91 @@
 	return p - dst;
 }
 EXPORT_SYMBOL(string_escape_mem);
+
+/*
+ * Return an allocated string that has been escaped of special characters
+ * and double quotes, making it safe to log in quotes.
+ */
+char *kstrdup_quotable(const char *src, gfp_t gfp)
+{
+	size_t slen, dlen;
+	char *dst;
+	const int flags = ESCAPE_HEX;
+	const char esc[] = "\f\n\r\t\v\a\e\\\"";
+
+	if (!src)
+		return NULL;
+	slen = strlen(src);
+
+	dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
+	dst = kmalloc(dlen + 1, gfp);
+	if (!dst)
+		return NULL;
+
+	WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
+	dst[dlen] = '\0';
+
+	return dst;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable);
+
+/*
+ * Returns allocated NULL-terminated string containing process
+ * command line, with inter-argument NULLs replaced with spaces,
+ * and other special characters escaped.
+ */
+char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
+{
+	char *buffer, *quoted;
+	int i, res;
+
+	buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY);
+	if (!buffer)
+		return NULL;
+
+	res = get_cmdline(task, buffer, PAGE_SIZE - 1);
+	buffer[res] = '\0';
+
+	/* Collapse trailing NULLs, leave res pointing to last non-NULL. */
+	while (--res >= 0 && buffer[res] == '\0')
+		;
+
+	/* Replace inter-argument NULLs. */
+	for (i = 0; i <= res; i++)
+		if (buffer[i] == '\0')
+			buffer[i] = ' ';
+
+	/* Make sure result is printable. */
+	quoted = kstrdup_quotable(buffer, gfp);
+	kfree(buffer);
+	return quoted;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
+
+/*
+ * Returns allocated NULL-terminated string containing pathname,
+ * with special characters escaped, able to be safely logged. If
+ * there is an error, the leading character will be "<".
+ */
+char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
+{
+	char *temp, *pathname;
+
+	if (!file)
+		return kstrdup("<unknown>", gfp);
+
+	/* We add 11 spaces for ' (deleted)' to be appended */
+	temp = kmalloc(PATH_MAX + 11, GFP_TEMPORARY);
+	if (!temp)
+		return kstrdup("<no_memory>", gfp);
+
+	pathname = file_path(file, temp, PATH_MAX + 11);
+	if (IS_ERR(pathname))
+		pathname = kstrdup("<too_long>", gfp);
+	else
+		pathname = kstrdup_quotable(pathname, gfp);
+
+	kfree(temp);
+	return pathname;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index c79b85e..8737412 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -281,7 +281,7 @@
 				GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
 				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				KEY_USR_VIEW | KEY_USR_READ,
-				KEY_ALLOC_NOT_IN_QUOTA, NULL);
+				KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto failed_put_cred;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 28cddc8..1325776 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -677,7 +677,7 @@
 	u32 spot = start;
 
 	while (rc == 0 && spot <= end) {
-		if (((spot & (BITS_PER_LONG - 1)) != 0) &&
+		if (((spot & (BITS_PER_LONG - 1)) == 0) &&
 		    ((end - spot) > BITS_PER_LONG)) {
 			rc = netlbl_catmap_setlong(catmap,
 						   spot,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 3fb492e..1021b4c 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -965,7 +965,7 @@
 
 	key = key_alloc(&key_type_rxrpc, "x",
 			GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0,
-			KEY_ALLOC_NOT_IN_QUOTA);
+			KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(key)) {
 		_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
 		return -ENOMEM;
@@ -1012,7 +1012,7 @@
 
 	key = key_alloc(&key_type_rxrpc, keyname,
 			GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
-			KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
+			KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(key))
 		return key;
 
diff --git a/samples/Kconfig b/samples/Kconfig
index d54f28c..559a58b 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -76,4 +76,13 @@
 	help
 	  Builds a sample configfs interface.
 
+config SAMPLE_CONNECTOR
+	tristate "Build connector sample -- loadable modules only"
+	depends on CONNECTOR && m
+	help
+	  When enabled, this builds both a sample kernel module for
+	  the connector interface and a user space tool to communicate
+	  with it.
+	  See also Documentation/connector/connector.txt
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index ad440d6..2e3b523 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -2,4 +2,4 @@
 
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
-			   configfs/ v4l/
+			   configfs/ connector/ v4l/
diff --git a/Documentation/connector/.gitignore b/samples/connector/.gitignore
similarity index 100%
rename from Documentation/connector/.gitignore
rename to samples/connector/.gitignore
diff --git a/Documentation/connector/Makefile b/samples/connector/Makefile
similarity index 76%
rename from Documentation/connector/Makefile
rename to samples/connector/Makefile
index d98e4df..04b9622 100644
--- a/Documentation/connector/Makefile
+++ b/samples/connector/Makefile
@@ -1,9 +1,9 @@
-ifneq ($(CONFIG_CONNECTOR),)
-obj-m += cn_test.o
-endif
+obj-$(CONFIG_SAMPLE_CONNECTOR) += cn_test.o
 
 # List of programs to build
+ifdef CONFIG_SAMPLE_CONNECTOR
 hostprogs-y := ucon
+endif
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
diff --git a/Documentation/connector/cn_test.c b/samples/connector/cn_test.c
similarity index 100%
rename from Documentation/connector/cn_test.c
rename to samples/connector/cn_test.c
diff --git a/Documentation/connector/ucon.c b/samples/connector/ucon.c
similarity index 100%
rename from Documentation/connector/ucon.c
rename to samples/connector/ucon.c
diff --git a/scripts/docproc.c b/scripts/docproc.c
index e267e621..0a12593 100644
--- a/scripts/docproc.c
+++ b/scripts/docproc.c
@@ -42,8 +42,10 @@
 #include <unistd.h>
 #include <limits.h>
 #include <errno.h>
+#include <getopt.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <time.h>
 
 /* exitstatus is used to keep track of any failing calls to kernel-doc,
  * but execution continues. */
@@ -68,12 +70,23 @@
 #define KERNELDOCPATH "scripts/"
 #define KERNELDOC     "kernel-doc"
 #define DOCBOOK       "-docbook"
+#define RST           "-rst"
 #define LIST          "-list"
 #define FUNCTION      "-function"
 #define NOFUNCTION    "-nofunction"
 #define NODOCSECTIONS "-no-doc-sections"
 #define SHOWNOTFOUND  "-show-not-found"
 
+enum file_format {
+	FORMAT_AUTO,
+	FORMAT_DOCBOOK,
+	FORMAT_RST,
+};
+
+static enum file_format file_format = FORMAT_AUTO;
+
+#define KERNELDOC_FORMAT	(file_format == FORMAT_RST ? RST : DOCBOOK)
+
 static char *srctree, *kernsrctree;
 
 static char **all_list = NULL;
@@ -95,7 +108,7 @@
 
 static void usage (void)
 {
-	fprintf(stderr, "Usage: docproc {doc|depend} file\n");
+	fprintf(stderr, "Usage: docproc [{--docbook|--rst}] {doc|depend} file\n");
 	fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
 	fprintf(stderr, "doc: frontend when generating kernel documentation\n");
 	fprintf(stderr, "depend: generate list of files referenced within file\n");
@@ -242,7 +255,7 @@
 /*
  * Document all external or internal functions in a file.
  * Call kernel-doc with following parameters:
- * kernel-doc -docbook -nofunction function_name1 filename
+ * kernel-doc [-docbook|-rst] -nofunction function_name1 filename
  * Function names are obtained from all the src files
  * by find_export_symbols.
  * intfunc uses -nofunction
@@ -263,7 +276,7 @@
 		exit(1);
 	}
 	vec[idx++] = KERNELDOC;
-	vec[idx++] = DOCBOOK;
+	vec[idx++] = KERNELDOC_FORMAT;
 	vec[idx++] = NODOCSECTIONS;
 	for (i=0; i < symfilecnt; i++) {
 		struct symfile * sym = &symfilelist[i];
@@ -275,7 +288,10 @@
 	}
 	vec[idx++]     = filename;
 	vec[idx] = NULL;
-	printf("<!-- %s -->\n", filename);
+	if (file_format == FORMAT_RST)
+		printf(".. %s\n", filename);
+	else
+		printf("<!-- %s -->\n", filename);
 	exec_kernel_doc(vec);
 	fflush(stdout);
 	free(vec);
@@ -294,7 +310,7 @@
 	int i, idx = 0;
 	int startofsym = 1;
 	vec[idx++] = KERNELDOC;
-	vec[idx++] = DOCBOOK;
+	vec[idx++] = KERNELDOC_FORMAT;
 	vec[idx++] = SHOWNOTFOUND;
 
 	/* Split line up in individual parameters preceded by FUNCTION */
@@ -343,7 +359,7 @@
 	free(s);
 
 	vec[0] = KERNELDOC;
-	vec[1] = DOCBOOK;
+	vec[1] = KERNELDOC_FORMAT;
 	vec[2] = SHOWNOTFOUND;
 	vec[3] = FUNCTION;
 	vec[4] = line;
@@ -431,6 +447,32 @@
 }
 
 /*
+ * Terminate s at first space, if any. If there was a space, return pointer to
+ * the character after that. Otherwise, return pointer to the terminating NUL.
+ */
+static char *chomp(char *s)
+{
+	while (*s && !isspace(*s))
+		s++;
+
+	if (*s)
+		*s++ = '\0';
+
+	return s;
+}
+
+/* Return pointer to directive content, or NULL if not a directive. */
+static char *is_directive(char *line)
+{
+	if (file_format == FORMAT_DOCBOOK && line[0] == '!')
+		return line + 1;
+	else if (file_format == FORMAT_RST && !strncmp(line, ".. !", 4))
+		return line + 4;
+
+	return NULL;
+}
+
+/*
  * Parse file, calling action specific functions for:
  * 1) Lines containing !E
  * 2) Lines containing !I
@@ -443,63 +485,75 @@
 static void parse_file(FILE *infile)
 {
 	char line[MAXLINESZ];
-	char * s;
+	char *p, *s;
 	while (fgets(line, MAXLINESZ, infile)) {
-		if (line[0] == '!') {
-			s = line + 2;
-			switch (line[1]) {
-				case 'E':
-					while (*s && !isspace(*s)) s++;
-					*s = '\0';
-					externalfunctions(line+2);
-					break;
-				case 'I':
-					while (*s && !isspace(*s)) s++;
-					*s = '\0';
-					internalfunctions(line+2);
-					break;
-				case 'D':
-					while (*s && !isspace(*s)) s++;
-					*s = '\0';
-					symbolsonly(line+2);
-					break;
-				case 'F':
-					/* filename */
-					while (*s && !isspace(*s)) s++;
-					*s++ = '\0';
-					/* function names */
-					while (isspace(*s))
-						s++;
-					singlefunctions(line +2, s);
-					break;
-				case 'P':
-					/* filename */
-					while (*s && !isspace(*s)) s++;
-					*s++ = '\0';
-					/* DOC: section name */
-					while (isspace(*s))
-						s++;
-					docsection(line + 2, s);
-					break;
-				case 'C':
-					while (*s && !isspace(*s)) s++;
-					*s = '\0';
-					if (findall)
-						findall(line+2);
-					break;
-				default:
-					defaultline(line);
-			}
-		} else {
+		p = is_directive(line);
+		if (!p) {
+			defaultline(line);
+			continue;
+		}
+
+		switch (*p++) {
+		case 'E':
+			chomp(p);
+			externalfunctions(p);
+			break;
+		case 'I':
+			chomp(p);
+			internalfunctions(p);
+			break;
+		case 'D':
+			chomp(p);
+			symbolsonly(p);
+			break;
+		case 'F':
+			/* filename */
+			s = chomp(p);
+			/* function names */
+			while (isspace(*s))
+				s++;
+			singlefunctions(p, s);
+			break;
+		case 'P':
+			/* filename */
+			s = chomp(p);
+			/* DOC: section name */
+			while (isspace(*s))
+				s++;
+			docsection(p, s);
+			break;
+		case 'C':
+			chomp(p);
+			if (findall)
+				findall(p);
+			break;
+		default:
 			defaultline(line);
 		}
 	}
 	fflush(stdout);
 }
 
+/*
+ * Is this a RestructuredText template?  Answer the question by seeing if its
+ * name ends in ".rst".
+ */
+static int is_rst(const char *file)
+{
+	char *dot = strrchr(file, '.');
+
+	return dot && !strcmp(dot + 1, "rst");
+}
+
+enum opts {
+	OPT_DOCBOOK,
+	OPT_RST,
+	OPT_HELP,
+};
 
 int main(int argc, char *argv[])
 {
+	const char *subcommand, *filename;
 	FILE * infile;
 	int i;
 
@@ -509,19 +563,66 @@
 	kernsrctree = getenv("KBUILD_SRC");
 	if (!kernsrctree || !*kernsrctree)
 		kernsrctree = srctree;
-	if (argc != 3) {
+
+	for (;;) {
+		int c;
+		struct option opts[] = {
+			{ "docbook",	no_argument, NULL, OPT_DOCBOOK },
+			{ "rst",	no_argument, NULL, OPT_RST },
+			{ "help",	no_argument, NULL, OPT_HELP },
+			{}
+		};
+
+		c = getopt_long_only(argc, argv, "", opts, NULL);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case OPT_DOCBOOK:
+			file_format = FORMAT_DOCBOOK;
+			break;
+		case OPT_RST:
+			file_format = FORMAT_RST;
+			break;
+		case OPT_HELP:
+			usage();
+			return 0;
+		default:
+		case '?':
+			usage();
+			return 1;
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 2) {
 		usage();
 		exit(1);
 	}
+
+	subcommand = argv[0];
+	filename = argv[1];
+
+	if (file_format == FORMAT_AUTO)
+		file_format = is_rst(filename) ? FORMAT_RST : FORMAT_DOCBOOK;
+
 	/* Open file, exit on error */
-	infile = fopen(argv[2], "r");
+	infile = fopen(filename, "r");
 	if (infile == NULL) {
 		fprintf(stderr, "docproc: ");
-		perror(argv[2]);
+		perror(filename);
 		exit(2);
 	}
 
-	if (strcmp("doc", argv[1]) == 0) {
+	if (strcmp("doc", subcommand) == 0) {
+		if (file_format == FORMAT_RST) {
+			time_t t = time(NULL);
+			printf(".. generated from %s by docproc %s\n",
+			       filename, ctime(&t));
+		}
+
 		/* Need to do this in two passes.
 		 * First pass is used to collect all symbols exported
 		 * in the various files;
@@ -557,10 +658,10 @@
 			fprintf(stderr, "Warning: didn't use docs for %s\n",
 				all_list[i]);
 		}
-	} else if (strcmp("depend", argv[1]) == 0) {
+	} else if (strcmp("depend", subcommand) == 0) {
 		/* Create first part of dependency chain
 		 * file.tmpl */
-		printf("%s\t", argv[2]);
+		printf("%s\t", filename);
 		defaultline       = noaction;
 		internalfunctions = adddep;
 		externalfunctions = adddep;
@@ -571,7 +672,7 @@
 		parse_file(infile);
 		printf("\n");
 	} else {
-		fprintf(stderr, "Unknown option: %s\n", argv[1]);
+		fprintf(stderr, "Unknown option: %s\n", subcommand);
 		exit(1);
 	}
 	fclose(infile);
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index f3d3fb4..b8c7b29 100755
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -188,7 +188,7 @@
 	$cont = 0;
 
 	# collect any Kconfig sources
-	if (/^source\s*"(.*)"/) {
+	if (/^source\s+"?([^"]+)/) {
 	    my $kconfig = $1;
 	    # prevent reading twice.
 	    if (!defined($read_kconfigs{$kconfig})) {
@@ -237,7 +237,7 @@
 	    }
 
 	# configs without prompts must be selected
-	} elsif ($state ne "NONE" && /^\s*tristate\s\S/) {
+	} elsif ($state ne "NONE" && /^\s*(tristate\s+\S|prompt\b)/) {
 	    # note if the config has a prompt
 	    $prompts{$config} = 1;
 
@@ -256,8 +256,8 @@
 
 	    $iflevel-- if ($iflevel);
 
-	# stop on "help"
-	} elsif (/^\s*help\s*$/) {
+	# stop on "help" and keywords that end a menu entry
+	} elsif (/^\s*(---)?help(---)?\s*$/ || /^(comment|choice|menu)\b/) {
 	    $state = "NONE";
 	}
     }
@@ -454,7 +454,7 @@
 	    $p =~ s/^[^$valid]*[$valid]+//;
 
 	    # We only need to process if the depend config is a module
-	    if (!defined($orig_configs{$conf}) || !$orig_configs{conf} eq "m") {
+	    if (!defined($orig_configs{$conf}) || $orig_configs{$conf} eq "y") {
 		next;
 	    }
 
@@ -610,6 +610,40 @@
 	next;
     }
 
+    if (/CONFIG_MODULE_SIG_KEY="(.+)"/) {
+        my $orig_cert = $1;
+        my $default_cert = "certs/signing_key.pem";
+
+        # Check that the logic in this script still matches the one in Kconfig
+        if (!defined($depends{"MODULE_SIG_KEY"}) ||
+            $depends{"MODULE_SIG_KEY"} !~ /"\Q$default_cert\E"/) {
+            print STDERR "WARNING: MODULE_SIG_KEY assertion failure, ",
+                "update needed to ", __FILE__, " line ", __LINE__, "\n";
+            print;
+        } elsif ($orig_cert ne $default_cert && ! -f $orig_cert) {
+            print STDERR "Module signature verification enabled but ",
+                "module signing key \"$orig_cert\" not found. Resetting ",
+                "signing key to default value.\n";
+            print "CONFIG_MODULE_SIG_KEY=\"$default_cert\"\n";
+        } else {
+            print;
+        }
+        next;
+    }
+
+    if (/CONFIG_SYSTEM_TRUSTED_KEYS="(.+)"/) {
+        my $orig_keys = $1;
+
+        if (! -f $orig_keys) {
+            print STDERR "System keyring enabled but keys \"$orig_keys\" ",
+                "not found. Resetting keys to default value.\n";
+            print "CONFIG_SYSTEM_TRUSTED_KEYS=\"\"\n";
+        } else {
+            print;
+        }
+        next;
+    }
+
     if (/^(CONFIG.*)=(m|y)/) {
 	if (defined($configs{$1})) {
 	    if ($localyesconfig) {
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index c37255b..2fc8fad 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -39,41 +39,44 @@
 # 25/07/2012 - Added support for HTML5
 # -- Dan Luedtke <mail@danrl.de>
 
-#
-# This will read a 'c' file and scan for embedded comments in the
-# style of gnome comments (+minor extensions - see below).
-#
+sub usage {
+    my $message = <<"EOF";
+Usage: $0 [OPTION ...] FILE ...
 
-# Note: This only supports 'c'.
+Read C language source or header FILEs, extract embedded documentation comments,
+and print formatted documentation to standard output.
 
-# usage:
-# kernel-doc [ -docbook | -html | -html5 | -text | -man | -list ]
-#            [ -no-doc-sections ]
-#            [ -function funcname [ -function funcname ...] ]
-#            c file(s)s > outputfile
-# or
-#            [ -nofunction funcname [ -function funcname ...] ]
-#            c file(s)s > outputfile
-#
-#  Set output format using one of -docbook -html -html5 -text or -man.
-#  Default is man.
-#  The -list format is for internal use by docproc.
-#
-#  -no-doc-sections
-#	Do not output DOC: sections
-#
-#  -function funcname
-#	If set, then only generate documentation for the given function(s) or
-#	DOC: section titles.  All other functions and DOC: sections are ignored.
-#
-#  -nofunction funcname
-#	If set, then only generate documentation for the other function(s)/DOC:
-#	sections. Cannot be used together with -function (yes, that's a bug --
-#	perl hackers can fix it 8))
-#
-#  c files - list of 'c' files to process
-#
-#  All output goes to stdout, with errors to stderr.
+The documentation comments are identified by "/**" opening comment mark. See
+Documentation/kernel-doc-nano-HOWTO.txt for the documentation comment syntax.
+
+Output format selection (mutually exclusive):
+  -docbook		Output DocBook format.
+  -html			Output HTML format.
+  -html5		Output HTML5 format.
+  -list			Output symbol list format. This is for use by docproc.
+  -man			Output troff manual page format. This is the default.
+  -rst			Output reStructuredText format.
+  -text			Output plain text format.
+
+Output selection (mutually exclusive):
+  -function NAME	Only output documentation for the given function(s)
+			or DOC: section title(s). All other functions and DOC:
+			sections are ignored. May be specified multiple times.
+  -nofunction NAME	Do NOT output documentation for the given function(s);
+			only output documentation for the other functions and
+			DOC: sections. May be specified multiple times.
+
+Output selection modifiers:
+  -no-doc-sections	Do not output DOC: sections.
+
+Other parameters:
+  -v			Verbose output, more warnings and other information.
+  -h			Print this help.
+
+EOF
+    print $message;
+    exit 1;
+}
 
 #
 # format of comments.
@@ -201,6 +204,8 @@
 my $type_struct = '\&((struct\s*)*[_\w]+)';
 my $type_struct_xml = '\\&amp;((struct\s*)*[_\w]+)';
 my $type_env = '(\$\w+)';
+my $type_enum_full = '\&(enum)\s*([_\w]+)';
+my $type_struct_full = '\&(struct)\s*([_\w]+)';
 
 # Output conversion substitutions.
 #  One for each output format
@@ -266,6 +271,17 @@
 		      );
 my $blankline_text = "";
 
+# rst-mode
+my @highlights_rst = (
+                       [$type_constant, "``\$1``"],
+                       [$type_func, "\\:c\\:func\\:`\$1`"],
+                       [$type_struct_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_enum_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
+                       [$type_struct, "\\:c\\:type\\:`struct \$1 <\$1>`"],
+                       [$type_param, "**\$1**"]
+		      );
+my $blankline_rst = "\n";
+
 # list mode
 my @highlights_list = (
                        [$type_constant, "\$1"],
@@ -402,6 +418,10 @@
 	$output_mode = "text";
 	@highlights = @highlights_text;
 	$blankline = $blankline_text;
+    } elsif ($cmd eq "-rst") {
+	$output_mode = "rst";
+	@highlights = @highlights_rst;
+	$blankline = $blankline_rst;
     } elsif ($cmd eq "-docbook") {
 	$output_mode = "xml";
 	@highlights = @highlights_xml;
@@ -437,17 +457,6 @@
 
 # continue execution near EOF;
 
-sub usage {
-    print "Usage: $0 [ -docbook | -html | -html5 | -text | -man | -list ]\n";
-    print "         [ -no-doc-sections ]\n";
-    print "         [ -function funcname [ -function funcname ...] ]\n";
-    print "         [ -nofunction funcname [ -nofunction funcname ...] ]\n";
-    print "         [ -v ]\n";
-    print "         c source file(s) > outputfile\n";
-    print "         -v : verbose output, more warnings & other info listed\n";
-    exit 1;
-}
-
 # get kernel version from env
 sub get_kernel_version() {
     my $version = 'unknown kernel version';
@@ -1713,6 +1722,208 @@
     }
 }
 
+##
+# output in restructured text
+#
+
+#
+# This could use some work; it's used to output the DOC: sections, and
+# starts by putting out the name of the doc section itself, but that tends
+# to duplicate a header already in the template file.
+#
+sub output_blockhead_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	print "**$section**\n\n";
+	output_highlight_rst($args{'sections'}{$section});
+	print "\n";
+    }
+}
+
+sub output_highlight_rst {
+    my $contents = join "\n",@_;
+    my $line;
+
+    # undo the evil effects of xml_escape() earlier
+    $contents = xml_unescape($contents);
+
+    eval $dohighlight;
+    die $@ if $@;
+
+    foreach $line (split "\n", $contents) {
+	if ($line eq "") {
+	    print $lineprefix, $blankline;
+	} else {
+	    $line =~ s/\\\\\\/\&/g;
+	    print $lineprefix, $line;
+	}
+	print "\n";
+    }
+}
+
+sub output_function_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter, $section);
+    my $start;
+
+    print ".. c:function:: ";
+    if ($args{'functiontype'} ne "") {
+	$start = $args{'functiontype'} . " " . $args{'function'} . " (";
+    } else {
+	$start = $args{'function'} . " (";
+    }
+    print $start;
+
+    my $count = 0;
+    foreach my $parameter (@{$args{'parameterlist'}}) {
+	if ($count ne 0) {
+	    print ", ";
+	}
+	$count++;
+	$type = $args{'parametertypes'}{$parameter};
+	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	    # pointer-to-function
+	    print $1 . $parameter . ") (" . $2;
+	} else {
+	    print $type . " " . $parameter;
+	}
+    }
+    print ")\n\n    " . $args{'purpose'} . "\n\n";
+
+    print ":Parameters:\n\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	my $parameter_name = $parameter;
+	#$parameter_name =~ s/\[.*//;
+	$type = $args{'parametertypes'}{$parameter};
+
+	if ($type ne "") {
+	    print "      ``$type $parameter``\n";
+	} else {
+	    print "      ``$parameter``\n";
+	}
+	if ($args{'parameterdescs'}{$parameter_name} ne $undescribed) {
+	    my $oldprefix = $lineprefix;
+	    $lineprefix = "        ";
+	    output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+	    $lineprefix = $oldprefix;
+	} else {
+	    print "\n        _undescribed_\n";
+	}
+	print "\n";
+    }
+    output_section_rst(@_);
+}
+
+sub output_section_rst(%) {
+    my %args = %{$_[0]};
+    my $section;
+    my $oldprefix = $lineprefix;
+    $lineprefix = "        ";
+
+    foreach $section (@{$args{'sectionlist'}}) {
+	print ":$section:\n\n";
+	output_highlight_rst($args{'sections'}{$section});
+	print "\n";
+    }
+    print "\n";
+    $lineprefix = $oldprefix;
+}
+
+sub output_enum_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $count;
+    my $name = "enum " . $args{'enum'};
+
+    print "\n\n.. c:type:: " . $name . "\n\n";
+    print "    " . $args{'purpose'} . "\n\n";
+
+    print "..\n\n:Constants:\n\n";
+    my $oldprefix = $lineprefix;
+    $lineprefix = "    ";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	print "  `$parameter`\n";
+	if ($args{'parameterdescs'}{$parameter} ne $undescribed) {
+	    output_highlight_rst($args{'parameterdescs'}{$parameter});
+	} else {
+	    print "    undescribed\n";
+	}
+	print "\n";
+    }
+    $lineprefix = $oldprefix;
+    output_section_rst(@_);
+}
+
+sub output_typedef_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $count;
+    my $name = "typedef " . $args{'typedef'};
+
+    ### FIXME: should the name below contain "typedef" or not?
+    print "\n\n.. c:type:: " . $name . "\n\n";
+    print "    " . $args{'purpose'} . "\n\n";
+
+    output_section_rst(@_);
+}
+
+sub output_struct_rst(%) {
+    my %args = %{$_[0]};
+    my ($parameter);
+    my $name = $args{'type'} . " " . $args{'struct'};
+
+    print "\n\n.. c:type:: " . $name . "\n\n";
+    print "    " . $args{'purpose'} . "\n\n";
+
+    print ":Definition:\n\n";
+    print " ::\n\n";
+    print "  " . $args{'type'} . " " . $args{'struct'} . " {\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	if ($parameter =~ /^#/) {
+	    print "    " . "$parameter\n";
+	    next;
+	}
+
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+	$type = $args{'parametertypes'}{$parameter};
+	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	    # pointer-to-function
+	    print "    $1 $parameter) ($2);\n";
+	} elsif ($type =~ m/^(.*?)\s*(:.*)/) {
+	    # bitfield
+	    print "    $1 $parameter$2;\n";
+	} else {
+	    print "    " . $type . " " . $parameter . ";\n";
+	}
+    }
+    print "  };\n\n";
+
+    print ":Members:\n\n";
+    foreach $parameter (@{$args{'parameterlist'}}) {
+	($parameter =~ /^#/) && next;
+
+	my $parameter_name = $parameter;
+	$parameter_name =~ s/\[.*//;
+
+	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
+	$type = $args{'parametertypes'}{$parameter};
+	print "      `$type $parameter`" . "\n";
+	my $oldprefix = $lineprefix;
+	$lineprefix = "        ";
+	output_highlight_rst($args{'parameterdescs'}{$parameter_name});
+	$lineprefix = $oldprefix;
+	print "\n";
+    }
+    print "\n";
+    output_section_rst(@_);
+}
+
+
 ## list mode output functions
 
 sub output_function_list(%) {
@@ -2414,6 +2625,18 @@
 	return $text;
 }
 
+# xml_unescape: reverse the effects of xml_escape
+sub xml_unescape($) {
+	my $text = shift;
+	if (($output_mode eq "text") || ($output_mode eq "man")) {
+		return $text;
+	}
+	$text =~ s/\\\\\\amp;/\&/g;
+	$text =~ s/\\\\\\lt;/</g;
+	$text =~ s/\\\\\\gt;/>/g;
+	return $text;
+}
+
 # convert local escape strings to html
 # local escape strings look like:  '\\\\menmonic:' (that's 4 backslashes)
 sub local_unescape($) {
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
index 7bfe9fa..d135882 100755
--- a/scripts/ld-version.sh
+++ b/scripts/ld-version.sh
@@ -5,6 +5,6 @@
 	gsub(".*version ", "");
 	gsub("-.*", "");
 	split($1,a, ".");
-	print a[1]*100000000 + a[2]*1000000 + a[3]*10000 + a[4]*100 + a[5];
+	print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
 	exit
 	}
diff --git a/security/Kconfig b/security/Kconfig
index e4523789..176758c 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -122,6 +122,7 @@
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
 source security/apparmor/Kconfig
+source security/loadpin/Kconfig
 source security/yama/Kconfig
 
 source security/integrity/Kconfig
diff --git a/security/Makefile b/security/Makefile
index c9bfbc8..f2d71cd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,6 +8,7 @@
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 subdir-$(CONFIG_SECURITY_APPARMOR)	+= apparmor
 subdir-$(CONFIG_SECURITY_YAMA)		+= yama
+subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -22,6 +23,7 @@
 obj-$(CONFIG_SECURITY_TOMOYO)		+= tomoyo/
 obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
 obj-$(CONFIG_SECURITY_YAMA)		+= yama/
+obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
 
 # Object integrity file lists
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index 979be65..da95658 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -35,7 +35,6 @@
 	default n
         select ASYMMETRIC_KEY_TYPE
         select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
-        select PUBLIC_KEY_ALGO_RSA
         select CRYPTO_RSA
         select X509_CERTIFICATE_PARSER
 	help
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 8ef1511..4304372 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -18,6 +18,8 @@
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/digsig.h>
+#include <crypto/public_key.h>
+#include <keys/system_keyring.h>
 
 #include "integrity.h"
 
@@ -40,6 +42,12 @@
 static bool init_keyring __initdata;
 #endif
 
+#ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+#define restrict_link_to_ima restrict_link_by_builtin_and_secondary_trusted
+#else
+#define restrict_link_to_ima restrict_link_by_builtin_trusted
+#endif
+
 int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
 			    const char *digest, int digestlen)
 {
@@ -83,10 +91,9 @@
 				    ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				     KEY_USR_VIEW | KEY_USR_READ |
 				     KEY_USR_WRITE | KEY_USR_SEARCH),
-				    KEY_ALLOC_NOT_IN_QUOTA, NULL);
-	if (!IS_ERR(keyring[id]))
-		set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags);
-	else {
+				    KEY_ALLOC_NOT_IN_QUOTA,
+				    restrict_link_to_ima, NULL);
+	if (IS_ERR(keyring[id])) {
 		err = PTR_ERR(keyring[id]);
 		pr_info("Can't allocate %s keyring (%d)\n",
 			keyring_name[id], err);
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index e54a8a8..5487827 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -155,23 +155,33 @@
 
 	   This option is deprecated in favor of INTEGRITY_TRUSTED_KEYRING
 
-config IMA_MOK_KEYRING
-	bool "Create IMA machine owner keys (MOK) and blacklist keyrings"
+config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+	bool "Permit keys validly signed by a built-in or secondary CA cert (EXPERIMENTAL)"
+	depends on SYSTEM_TRUSTED_KEYRING
+	depends on SECONDARY_TRUSTED_KEYRING
+	depends on INTEGRITY_ASYMMETRIC_KEYS
+	select INTEGRITY_TRUSTED_KEYRING
+	default n
+	help
+	  Keys may be added to the IMA or IMA blacklist keyrings, if the
+	  key is validly signed by a CA cert in the system built-in or
+	  secondary trusted keyrings.
+
+	  Intermediate keys between those the kernel has compiled in and the
+	  IMA keys to be added may be added to the system secondary keyring,
+	  provided they are validly signed by a key already resident in the
+	  built-in or secondary trusted keyrings.
+
+config IMA_BLACKLIST_KEYRING
+	bool "Create IMA machine owner blacklist keyrings (EXPERIMENTAL)"
 	depends on SYSTEM_TRUSTED_KEYRING
 	depends on IMA_TRUSTED_KEYRING
 	default n
 	help
-	   This option creates IMA MOK and blacklist keyrings.  IMA MOK is an
-	   intermediate keyring that sits between .system and .ima keyrings,
-	   effectively forming a simple CA hierarchy.  To successfully import a
-	   key into .ima_mok it must be signed by a key which CA is in .system
-	   keyring.  On turn any key that needs to go in .ima keyring must be
-	   signed by CA in either .system or .ima_mok keyrings. IMA MOK is empty
-	   at kernel boot.
-
-	   IMA blacklist keyring contains all revoked IMA keys.  It is consulted
-	   before any other keyring.  If the search is successful the requested
-	   operation is rejected and error is returned to the caller.
+	   This option creates an IMA blacklist keyring, which contains all
+	   revoked IMA keys.  It is consulted before any other keyring.  If
+	   the search is successful the requested operation is rejected and
+	   an error is returned to the caller.
 
 config IMA_LOAD_X509
 	bool "Load X509 certificate onto the '.ima' trusted keyring"
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index a8539f9..9aeaeda 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -8,4 +8,4 @@
 ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
 	 ima_policy.o ima_template.o ima_template_lib.o
 ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
-obj-$(CONFIG_IMA_MOK_KEYRING) += ima_mok.o
+obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 6b4694a..1bcbc12 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -275,6 +275,11 @@
 		     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
 			if (!ima_fix_xattr(dentry, iint))
 				status = INTEGRITY_PASS;
+		} else if ((inode->i_size == 0) &&
+			   (iint->flags & IMA_NEW_FILE) &&
+			   (xattr_value &&
+			    xattr_value->type == EVM_IMA_XATTR_DIGSIG)) {
+			status = INTEGRITY_PASS;
 		}
 		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
 				    op, cause, rc, 0);
@@ -328,7 +333,7 @@
 	if (iint) {
 		iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED |
 				 IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
-				 IMA_ACTION_FLAGS);
+				 IMA_ACTION_RULE_FLAGS);
 		if (must_appraise)
 			iint->flags |= IMA_APPRAISE;
 	}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 391f417..68b26c3 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -246,7 +246,8 @@
 		ima_audit_measurement(iint, pathname);
 
 out_digsig:
-	if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG))
+	if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG) &&
+	     !(iint->flags & IMA_NEW_FILE))
 		rc = -EACCES;
 	kfree(xattr_value);
 out_free:
@@ -316,6 +317,28 @@
 EXPORT_SYMBOL_GPL(ima_file_check);
 
 /**
+ * ima_post_path_mknod - mark as a new inode
+ * @dentry: newly created dentry
+ *
+ * Mark files created via the mknodat syscall as new, so that the
+ * file data can be written later.
+ */
+void ima_post_path_mknod(struct dentry *dentry)
+{
+	struct integrity_iint_cache *iint;
+	struct inode *inode = dentry->d_inode;
+	int must_appraise;
+
+	must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK);
+	if (!must_appraise)
+		return;
+
+	iint = integrity_inode_get(inode);
+	if (iint)
+		iint->flags |= IMA_NEW_FILE;
+}
+
+/**
  * ima_read_file - pre-measure/appraise hook decision based on policy
  * @file: pointer to the file to be measured/appraised/audit
  * @read_id: caller identifier
diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c
index 676885e..74a27995 100644
--- a/security/integrity/ima/ima_mok.c
+++ b/security/integrity/ima/ima_mok.c
@@ -17,38 +17,29 @@
 #include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/init.h>
-#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
 
 
-struct key *ima_mok_keyring;
 struct key *ima_blacklist_keyring;
 
 /*
- * Allocate the IMA MOK and blacklist keyrings
+ * Allocate the IMA blacklist keyring
  */
 __init int ima_mok_init(void)
 {
-	pr_notice("Allocating IMA MOK and blacklist keyrings.\n");
-
-	ima_mok_keyring = keyring_alloc(".ima_mok",
-			      KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
-			      (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-			      KEY_USR_VIEW | KEY_USR_READ |
-			      KEY_USR_WRITE | KEY_USR_SEARCH,
-			      KEY_ALLOC_NOT_IN_QUOTA, NULL);
+	pr_notice("Allocating IMA blacklist keyring.\n");
 
 	ima_blacklist_keyring = keyring_alloc(".ima_blacklist",
 				KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
 				(KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				KEY_USR_VIEW | KEY_USR_READ |
 				KEY_USR_WRITE | KEY_USR_SEARCH,
-				KEY_ALLOC_NOT_IN_QUOTA, NULL);
+				KEY_ALLOC_NOT_IN_QUOTA,
+				restrict_link_by_builtin_trusted, NULL);
 
-	if (IS_ERR(ima_mok_keyring) || IS_ERR(ima_blacklist_keyring))
-		panic("Can't allocate IMA MOK or blacklist keyrings.");
-	set_bit(KEY_FLAG_TRUSTED_ONLY, &ima_mok_keyring->flags);
+	if (IS_ERR(ima_blacklist_keyring))
+		panic("Can't allocate IMA blacklist keyring.");
 
-	set_bit(KEY_FLAG_TRUSTED_ONLY, &ima_blacklist_keyring->flags);
 	set_bit(KEY_FLAG_KEEP, &ima_blacklist_keyring->flags);
 	return 0;
 }
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index e08935c..90bc57d 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -28,6 +28,7 @@
 
 /* iint cache flags */
 #define IMA_ACTION_FLAGS	0xff000000
+#define IMA_ACTION_RULE_FLAGS	0x06000000
 #define IMA_DIGSIG		0x01000000
 #define IMA_DIGSIG_REQUIRED	0x02000000
 #define IMA_PERMIT_DIRECTIO	0x04000000
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index fe4d74e..f826e87 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -41,6 +41,10 @@
 	bool "Large payload keys"
 	depends on KEYS
 	depends on TMPFS
+	select CRYPTO
+	select CRYPTO_AES
+	select CRYPTO_ECB
+	select CRYPTO_RNG
 	help
 	  This option provides support for holding large keys within the kernel
 	  (for example Kerberos ticket caches).  The data may be stored out to
@@ -81,3 +85,14 @@
 	  Userspace only ever sees/stores encrypted blobs.
 
 	  If you are unsure as to whether this is required, answer N.
+
+config KEY_DH_OPERATIONS
+       bool "Diffie-Hellman operations on retained keys"
+       depends on KEYS
+       select MPILIB
+       help
+	 This option provides support for calculating Diffie-Hellman
+	 public keys and shared secrets using values stored as keys
+	 in the kernel.
+
+	 If you are unsure as to whether this is required, answer N.
diff --git a/security/keys/Makefile b/security/keys/Makefile
index dfb3a7b..1fd4a16 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -19,6 +19,7 @@
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
 obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
+obj-$(CONFIG_KEY_DH_OPERATIONS) += dh.o
 
 #
 # Key types
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index c721e39..9e443fc 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -14,8 +14,10 @@
 #include <linux/file.h>
 #include <linux/shmem_fs.h>
 #include <linux/err.h>
+#include <linux/scatterlist.h>
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
+#include <crypto/rng.h>
 
 /*
  * Layout of key payload words.
@@ -28,6 +30,14 @@
 };
 
 /*
+ * Crypto operation with big_key data
+ */
+enum big_key_op {
+	BIG_KEY_ENC,
+	BIG_KEY_DEC,
+};
+
+/*
  * If the data is under this limit, there's no point creating a shm file to
  * hold it as the permanently resident metadata for the shmem fs will be at
  * least as large as the data.
@@ -35,6 +45,11 @@
 #define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
 
 /*
+ * Key size for big_key data encryption
+ */
+#define ENC_KEY_SIZE	16
+
+/*
  * big_key defined keys take an arbitrary string as the description and an
  * arbitrary blob of data as the payload
  */
@@ -50,12 +65,62 @@
 };
 
 /*
+ * Crypto names for big_key data encryption
+ */
+static const char big_key_rng_name[] = "stdrng";
+static const char big_key_alg_name[] = "ecb(aes)";
+
+/*
+ * Crypto algorithms for big_key data encryption
+ */
+static struct crypto_rng *big_key_rng;
+static struct crypto_blkcipher *big_key_blkcipher;
+
+/*
+ * Generate random key to encrypt big_key data
+ */
+static inline int big_key_gen_enckey(u8 *key)
+{
+	return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE);
+}
+
+/*
+ * Encrypt/decrypt big_key data
+ */
+static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
+{
+	int ret = -EINVAL;
+	struct scatterlist sgio;
+	struct blkcipher_desc desc;
+
+	if (crypto_blkcipher_setkey(big_key_blkcipher, key, ENC_KEY_SIZE)) {
+		ret = -EAGAIN;
+		goto error;
+	}
+
+	desc.flags = 0;
+	desc.tfm = big_key_blkcipher;
+
+	sg_init_one(&sgio, data, datalen);
+
+	if (op == BIG_KEY_ENC)
+		ret = crypto_blkcipher_encrypt(&desc, &sgio, &sgio, datalen);
+	else
+		ret = crypto_blkcipher_decrypt(&desc, &sgio, &sgio, datalen);
+
+error:
+	return ret;
+}
+
+/*
  * Preparse a big key
  */
 int big_key_preparse(struct key_preparsed_payload *prep)
 {
 	struct path *path = (struct path *)&prep->payload.data[big_key_path];
 	struct file *file;
+	u8 *enckey;
+	u8 *data = NULL;
 	ssize_t written;
 	size_t datalen = prep->datalen;
 	int ret;
@@ -73,16 +138,43 @@
 		/* Create a shmem file to store the data in.  This will permit the data
 		 * to be swapped out if needed.
 		 *
-		 * TODO: Encrypt the stored data with a temporary key.
+		 * File content is stored encrypted with randomly generated key.
 		 */
-		file = shmem_kernel_file_setup("", datalen, 0);
-		if (IS_ERR(file)) {
-			ret = PTR_ERR(file);
+		size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher));
+
+		/* prepare aligned data to encrypt */
+		data = kmalloc(enclen, GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data, prep->data, datalen);
+		memset(data + datalen, 0x00, enclen - datalen);
+
+		/* generate random key */
+		enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
+		if (!enckey) {
+			ret = -ENOMEM;
 			goto error;
 		}
 
-		written = kernel_write(file, prep->data, prep->datalen, 0);
-		if (written != datalen) {
+		ret = big_key_gen_enckey(enckey);
+		if (ret)
+			goto err_enckey;
+
+		/* encrypt aligned data */
+		ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey);
+		if (ret)
+			goto err_enckey;
+
+		/* save aligned data to file */
+		file = shmem_kernel_file_setup("", enclen, 0);
+		if (IS_ERR(file)) {
+			ret = PTR_ERR(file);
+			goto err_enckey;
+		}
+
+		written = kernel_write(file, data, enclen, 0);
+		if (written != enclen) {
 			ret = written;
 			if (written >= 0)
 				ret = -ENOMEM;
@@ -92,12 +184,15 @@
 		/* Pin the mount and dentry to the key so that we can open it again
 		 * later
 		 */
+		prep->payload.data[big_key_data] = enckey;
 		*path = file->f_path;
 		path_get(path);
 		fput(file);
+		kfree(data);
 	} else {
 		/* Just store the data in a buffer */
 		void *data = kmalloc(datalen, GFP_KERNEL);
+
 		if (!data)
 			return -ENOMEM;
 
@@ -108,7 +203,10 @@
 
 err_fput:
 	fput(file);
+err_enckey:
+	kfree(enckey);
 error:
+	kfree(data);
 	return ret;
 }
 
@@ -119,10 +217,10 @@
 {
 	if (prep->datalen > BIG_KEY_FILE_THRESHOLD) {
 		struct path *path = (struct path *)&prep->payload.data[big_key_path];
+
 		path_put(path);
-	} else {
-		kfree(prep->payload.data[big_key_data]);
 	}
+	kfree(prep->payload.data[big_key_data]);
 }
 
 /*
@@ -147,15 +245,15 @@
 {
 	size_t datalen = (size_t)key->payload.data[big_key_len];
 
-	if (datalen) {
+	if (datalen > BIG_KEY_FILE_THRESHOLD) {
 		struct path *path = (struct path *)&key->payload.data[big_key_path];
+
 		path_put(path);
 		path->mnt = NULL;
 		path->dentry = NULL;
-	} else {
-		kfree(key->payload.data[big_key_data]);
-		key->payload.data[big_key_data] = NULL;
 	}
+	kfree(key->payload.data[big_key_data]);
+	key->payload.data[big_key_data] = NULL;
 }
 
 /*
@@ -188,17 +286,41 @@
 	if (datalen > BIG_KEY_FILE_THRESHOLD) {
 		struct path *path = (struct path *)&key->payload.data[big_key_path];
 		struct file *file;
-		loff_t pos;
+		u8 *data;
+		u8 *enckey = (u8 *)key->payload.data[big_key_data];
+		size_t enclen = ALIGN(datalen, crypto_blkcipher_blocksize(big_key_blkcipher));
+
+		data = kmalloc(enclen, GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
 
 		file = dentry_open(path, O_RDONLY, current_cred());
-		if (IS_ERR(file))
-			return PTR_ERR(file);
+		if (IS_ERR(file)) {
+			ret = PTR_ERR(file);
+			goto error;
+		}
 
-		pos = 0;
-		ret = vfs_read(file, buffer, datalen, &pos);
-		fput(file);
-		if (ret >= 0 && ret != datalen)
+		/* read file to kernel and decrypt */
+		ret = kernel_read(file, 0, data, enclen);
+		if (ret >= 0 && ret != enclen) {
 			ret = -EIO;
+			goto err_fput;
+		}
+
+		ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey);
+		if (ret)
+			goto err_fput;
+
+		ret = datalen;
+
+		/* copy decrypted data to user */
+		if (copy_to_user(buffer, data, datalen) != 0)
+			ret = -EFAULT;
+
+err_fput:
+		fput(file);
+error:
+		kfree(data);
 	} else {
 		ret = datalen;
 		if (copy_to_user(buffer, key->payload.data[big_key_data],
@@ -209,8 +331,48 @@
 	return ret;
 }
 
+/*
+ * Register key type
+ */
 static int __init big_key_init(void)
 {
 	return register_key_type(&key_type_big_key);
 }
+
+/*
+ * Initialize big_key crypto and RNG algorithms
+ */
+static int __init big_key_crypto_init(void)
+{
+	int ret = -EINVAL;
+
+	/* init RNG */
+	big_key_rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
+	if (IS_ERR(big_key_rng)) {
+		big_key_rng = NULL;
+		return -EFAULT;
+	}
+
+	/* seed RNG */
+	ret = crypto_rng_reset(big_key_rng, NULL, crypto_rng_seedsize(big_key_rng));
+	if (ret)
+		goto error;
+
+	/* init block cipher */
+	big_key_blkcipher = crypto_alloc_blkcipher(big_key_alg_name, 0, 0);
+	if (IS_ERR(big_key_blkcipher)) {
+		big_key_blkcipher = NULL;
+		ret = -EFAULT;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	crypto_free_rng(big_key_rng);
+	big_key_rng = NULL;
+	return ret;
+}
+
 device_initcall(big_key_init);
+late_initcall(big_key_crypto_init);
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 25430a3..c8783b3 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -132,6 +132,10 @@
 	case KEYCTL_GET_PERSISTENT:
 		return keyctl_get_persistent(arg2, arg3);
 
+	case KEYCTL_DH_COMPUTE:
+		return keyctl_dh_compute(compat_ptr(arg2), compat_ptr(arg3),
+					 arg4);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/dh.c b/security/keys/dh.c
new file mode 100644
index 0000000..880505a
--- /dev/null
+++ b/security/keys/dh.c
@@ -0,0 +1,160 @@
+/* Crypto operations using stored keys
+ *
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/mpi.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+/*
+ * Public key or shared secret generation function [RFC2631 sec 2.1.1]
+ *
+ * ya = g^xa mod p;
+ * or
+ * ZZ = yb^xa mod p;
+ *
+ * where xa is the local private key, ya is the local public key, g is
+ * the generator, p is the prime, yb is the remote public key, and ZZ
+ * is the shared secret.
+ *
+ * Both are the same calculation, so g or yb are the "base" and ya or
+ * ZZ are the "result".
+ */
+static int do_dh(MPI result, MPI base, MPI xa, MPI p)
+{
+	return mpi_powm(result, base, xa, p);
+}
+
+static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
+{
+	struct key *key;
+	key_ref_t key_ref;
+	long status;
+	ssize_t ret;
+
+	key_ref = lookup_user_key(keyid, 0, KEY_NEED_READ);
+	if (IS_ERR(key_ref)) {
+		ret = -ENOKEY;
+		goto error;
+	}
+
+	key = key_ref_to_ptr(key_ref);
+
+	ret = -EOPNOTSUPP;
+	if (key->type == &key_type_user) {
+		down_read(&key->sem);
+		status = key_validate(key);
+		if (status == 0) {
+			const struct user_key_payload *payload;
+
+			payload = user_key_payload(key);
+
+			if (maxlen == 0) {
+				*mpi = NULL;
+				ret = payload->datalen;
+			} else if (payload->datalen <= maxlen) {
+				*mpi = mpi_read_raw_data(payload->data,
+							 payload->datalen);
+				if (*mpi)
+					ret = payload->datalen;
+			} else {
+				ret = -EINVAL;
+			}
+		}
+		up_read(&key->sem);
+	}
+
+	key_put(key);
+error:
+	return ret;
+}
+
+long keyctl_dh_compute(struct keyctl_dh_params __user *params,
+		       char __user *buffer, size_t buflen)
+{
+	long ret;
+	MPI base, private, prime, result;
+	unsigned nbytes;
+	struct keyctl_dh_params pcopy;
+	uint8_t *kbuf;
+	ssize_t keylen;
+	size_t resultlen;
+
+	if (!params || (!buffer && buflen)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (copy_from_user(&pcopy, params, sizeof(pcopy)) != 0) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	keylen = mpi_from_key(pcopy.prime, buflen, &prime);
+	if (keylen < 0 || !prime) {
+		/* buflen == 0 may be used to query the required buffer size,
+		 * which is the prime key length.
+		 */
+		ret = keylen;
+		goto out;
+	}
+
+	/* The result is never longer than the prime */
+	resultlen = keylen;
+
+	keylen = mpi_from_key(pcopy.base, SIZE_MAX, &base);
+	if (keylen < 0 || !base) {
+		ret = keylen;
+		goto error1;
+	}
+
+	keylen = mpi_from_key(pcopy.private, SIZE_MAX, &private);
+	if (keylen < 0 || !private) {
+		ret = keylen;
+		goto error2;
+	}
+
+	result = mpi_alloc(0);
+	if (!result) {
+		ret = -ENOMEM;
+		goto error3;
+	}
+
+	kbuf = kmalloc(resultlen, GFP_KERNEL);
+	if (!kbuf) {
+		ret = -ENOMEM;
+		goto error4;
+	}
+
+	ret = do_dh(result, base, private, prime);
+	if (ret)
+		goto error5;
+
+	ret = mpi_read_buffer(result, kbuf, resultlen, &nbytes, NULL);
+	if (ret != 0)
+		goto error5;
+
+	ret = nbytes;
+	if (copy_to_user(buffer, kbuf, nbytes) != 0)
+		ret = -EFAULT;
+
+error5:
+	kfree(kbuf);
+error4:
+	mpi_free(result);
+error3:
+	mpi_free(private);
+error2:
+	mpi_free(base);
+error1:
+	mpi_free(prime);
+out:
+	return ret;
+}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 5105c2c..8ec7a52 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>
+#include <linux/keyctl.h>
 
 struct iovec;
 
@@ -257,6 +258,17 @@
 }
 #endif
 
+#ifdef CONFIG_KEY_DH_OPERATIONS
+extern long keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *,
+			      size_t);
+#else
+static inline long keyctl_dh_compute(struct keyctl_dh_params __user *params,
+				     char __user *buffer, size_t buflen)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Debugging key validation
  */
diff --git a/security/keys/key.c b/security/keys/key.c
index b287551..bd5a272 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -201,6 +201,7 @@
  * @cred: The credentials specifying UID namespace.
  * @perm: The permissions mask of the new key.
  * @flags: Flags specifying quota properties.
+ * @restrict_link: Optional link restriction method for new keyrings.
  *
  * Allocate a key of the specified type with the attributes given.  The key is
  * returned in an uninstantiated state and the caller needs to instantiate the
@@ -223,7 +224,10 @@
  */
 struct key *key_alloc(struct key_type *type, const char *desc,
 		      kuid_t uid, kgid_t gid, const struct cred *cred,
-		      key_perm_t perm, unsigned long flags)
+		      key_perm_t perm, unsigned long flags,
+		      int (*restrict_link)(struct key *,
+					   const struct key_type *,
+					   const union key_payload *))
 {
 	struct key_user *user = NULL;
 	struct key *key;
@@ -291,11 +295,10 @@
 	key->uid = uid;
 	key->gid = gid;
 	key->perm = perm;
+	key->restrict_link = restrict_link;
 
 	if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
 		key->flags |= 1 << KEY_FLAG_IN_QUOTA;
-	if (flags & KEY_ALLOC_TRUSTED)
-		key->flags |= 1 << KEY_FLAG_TRUSTED;
 	if (flags & KEY_ALLOC_BUILT_IN)
 		key->flags |= 1 << KEY_FLAG_BUILTIN;
 
@@ -496,6 +499,12 @@
 	}
 
 	if (keyring) {
+		if (keyring->restrict_link) {
+			ret = keyring->restrict_link(keyring, key->type,
+						     &prep.payload);
+			if (ret < 0)
+				goto error;
+		}
 		ret = __key_link_begin(keyring, &key->index_key, &edit);
 		if (ret < 0)
 			goto error;
@@ -551,8 +560,12 @@
 	awaken = 0;
 	ret = -EBUSY;
 
-	if (keyring)
+	if (keyring) {
+		if (keyring->restrict_link)
+			return -EPERM;
+
 		link_ret = __key_link_begin(keyring, &key->index_key, &edit);
+	}
 
 	mutex_lock(&key_construction_mutex);
 
@@ -793,6 +806,9 @@
 	struct key *keyring, *key = NULL;
 	key_ref_t key_ref;
 	int ret;
+	int (*restrict_link)(struct key *,
+			     const struct key_type *,
+			     const union key_payload *) = NULL;
 
 	/* look up the key type to see if it's one of the registered kernel
 	 * types */
@@ -811,6 +827,10 @@
 
 	key_check(keyring);
 
+	key_ref = ERR_PTR(-EPERM);
+	if (!(flags & KEY_ALLOC_BYPASS_RESTRICTION))
+		restrict_link = keyring->restrict_link;
+
 	key_ref = ERR_PTR(-ENOTDIR);
 	if (keyring->type != &key_type_keyring)
 		goto error_put_type;
@@ -819,7 +839,6 @@
 	prep.data = payload;
 	prep.datalen = plen;
 	prep.quotalen = index_key.type->def_datalen;
-	prep.trusted = flags & KEY_ALLOC_TRUSTED;
 	prep.expiry = TIME_T_MAX;
 	if (index_key.type->preparse) {
 		ret = index_key.type->preparse(&prep);
@@ -835,10 +854,13 @@
 	}
 	index_key.desc_len = strlen(index_key.description);
 
-	key_ref = ERR_PTR(-EPERM);
-	if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
-		goto error_free_prep;
-	flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
+	if (restrict_link) {
+		ret = restrict_link(keyring, index_key.type, &prep.payload);
+		if (ret < 0) {
+			key_ref = ERR_PTR(ret);
+			goto error_free_prep;
+		}
+	}
 
 	ret = __key_link_begin(keyring, &index_key, &edit);
 	if (ret < 0) {
@@ -879,7 +901,7 @@
 
 	/* allocate a new key */
 	key = key_alloc(index_key.type, index_key.description,
-			cred->fsuid, cred->fsgid, cred, perm, flags);
+			cred->fsuid, cred->fsgid, cred, perm, flags, NULL);
 	if (IS_ERR(key)) {
 		key_ref = ERR_CAST(key);
 		goto error_link_end;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ed73c6c..3b135a0 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1686,6 +1686,11 @@
 	case KEYCTL_GET_PERSISTENT:
 		return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
 
+	case KEYCTL_DH_COMPUTE:
+		return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2,
+					 (char __user *) arg3,
+					 (size_t) arg4);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index f931ccf..c91e4e0 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -491,13 +491,17 @@
  */
 struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 			  const struct cred *cred, key_perm_t perm,
-			  unsigned long flags, struct key *dest)
+			  unsigned long flags,
+			  int (*restrict_link)(struct key *,
+					       const struct key_type *,
+					       const union key_payload *),
+			  struct key *dest)
 {
 	struct key *keyring;
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, cred, perm, flags);
+			    uid, gid, cred, perm, flags, restrict_link);
 	if (!IS_ERR(keyring)) {
 		ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
 		if (ret < 0) {
@@ -510,6 +514,26 @@
 }
 EXPORT_SYMBOL(keyring_alloc);
 
+/**
+ * restrict_link_reject - Give -EPERM to restrict link
+ * @keyring: The keyring being added to.
+ * @type: The type of key being added.
+ * @payload: The payload of the key intended to be added.
+ *
+ * Reject the addition of any links to a keyring.  It can be overridden by
+ * passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when
+ * adding a key to a keyring.
+ *
+ * This is meant to be passed as the restrict_link parameter to
+ * keyring_alloc().
+ */
+int restrict_link_reject(struct key *keyring,
+			 const struct key_type *type,
+			 const union key_payload *payload)
+{
+	return -EPERM;
+}
+
 /*
  * By default, we keys found by getting an exact match on their descriptions.
  */
@@ -1191,6 +1215,16 @@
 	up_write(&keyring->sem);
 }
 
+/*
+ * Check addition of keys to restricted keyrings.
+ */
+static int __key_link_check_restriction(struct key *keyring, struct key *key)
+{
+	if (!keyring->restrict_link)
+		return 0;
+	return keyring->restrict_link(keyring, key->type, &key->payload);
+}
+
 /**
  * key_link - Link a key to a keyring
  * @keyring: The keyring to make the link in.
@@ -1221,14 +1255,12 @@
 	key_check(keyring);
 	key_check(key);
 
-	if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
-	    !test_bit(KEY_FLAG_TRUSTED, &key->flags))
-		return -EPERM;
-
 	ret = __key_link_begin(keyring, &key->index_key, &edit);
 	if (ret == 0) {
 		kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
-		ret = __key_link_check_live_key(keyring, key);
+		ret = __key_link_check_restriction(keyring, key);
+		if (ret == 0)
+			ret = __key_link_check_live_key(keyring, key);
 		if (ret == 0)
 			__key_link(key, &edit);
 		__key_link_end(keyring, &key->index_key, edit);
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index c9fae5e..2ef45b3 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c
@@ -26,7 +26,7 @@
 					current_cred(),
 					((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 					 KEY_USR_VIEW | KEY_USR_READ),
-					KEY_ALLOC_NOT_IN_QUOTA, NULL);
+					KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
 	if (IS_ERR(reg))
 		return PTR_ERR(reg);
 
@@ -60,7 +60,7 @@
 				   uid, INVALID_GID, current_cred(),
 				   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 				    KEY_USR_VIEW | KEY_USR_READ),
-				   KEY_ALLOC_NOT_IN_QUOTA,
+				   KEY_ALLOC_NOT_IN_QUOTA, NULL,
 				   ns->persistent_keyring_register);
 	if (IS_ERR(persistent))
 		return ERR_CAST(persistent);
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index e6d50172..40a8852 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -76,7 +76,8 @@
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
 						    cred, user_keyring_perm,
-						    KEY_ALLOC_IN_QUOTA, NULL);
+						    KEY_ALLOC_IN_QUOTA,
+						    NULL, NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
 				goto error;
@@ -92,7 +93,8 @@
 			session_keyring =
 				keyring_alloc(buf, user->uid, INVALID_GID,
 					      cred, user_keyring_perm,
-					      KEY_ALLOC_IN_QUOTA, NULL);
+					      KEY_ALLOC_IN_QUOTA,
+					      NULL, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -134,7 +136,8 @@
 
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
-				KEY_ALLOC_QUOTA_OVERRUN, NULL);
+				KEY_ALLOC_QUOTA_OVERRUN,
+				NULL, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
@@ -180,7 +183,8 @@
 
 	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
-				KEY_ALLOC_QUOTA_OVERRUN, NULL);
+				KEY_ALLOC_QUOTA_OVERRUN,
+				NULL, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
@@ -231,7 +235,7 @@
 
 		keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
 					KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
-					flags, NULL);
+					flags, NULL, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	} else {
@@ -785,7 +789,7 @@
 		keyring = keyring_alloc(
 			name, old->uid, old->gid, old,
 			KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
-			KEY_ALLOC_IN_QUOTA, NULL);
+			KEY_ALLOC_IN_QUOTA, NULL, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
 			goto error2;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index c7a117c..a29e355 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -116,7 +116,7 @@
 	cred = get_current_cred();
 	keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
 				KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
-				KEY_ALLOC_QUOTA_OVERRUN, NULL);
+				KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL);
 	put_cred(cred);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
@@ -355,7 +355,7 @@
 
 	key = key_alloc(ctx->index_key.type, ctx->index_key.description,
 			ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
-			perm, flags);
+			perm, flags, NULL);
 	if (IS_ERR(key))
 		goto alloc_failed;
 
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 4f0f112..9db8b4a 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -202,7 +202,7 @@
 	authkey = key_alloc(&key_type_request_key_auth, desc,
 			    cred->fsuid, cred->fsgid, cred,
 			    KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
-			    KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA);
+			    KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
 	if (IS_ERR(authkey)) {
 		ret = PTR_ERR(authkey);
 		goto error_alloc;
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 8705d79..66b1840 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -96,45 +96,25 @@
  */
 int user_update(struct key *key, struct key_preparsed_payload *prep)
 {
-	struct user_key_payload *upayload, *zap;
-	size_t datalen = prep->datalen;
+	struct user_key_payload *zap = NULL;
 	int ret;
 
-	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !prep->data)
-		goto error;
-
-	/* construct a replacement payload */
-	ret = -ENOMEM;
-	upayload = kmalloc(sizeof(*upayload) + datalen, GFP_KERNEL);
-	if (!upayload)
-		goto error;
-
-	upayload->datalen = datalen;
-	memcpy(upayload->data, prep->data, datalen);
-
 	/* check the quota and attach the new data */
-	zap = upayload;
+	ret = key_payload_reserve(key, prep->datalen);
+	if (ret < 0)
+		return ret;
 
-	ret = key_payload_reserve(key, datalen);
-
-	if (ret == 0) {
-		/* attach the new data, displacing the old */
-		if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
-			zap = key->payload.data[0];
-		else
-			zap = NULL;
-		rcu_assign_keypointer(key, upayload);
-		key->expiry = 0;
-	}
+	/* attach the new data, displacing the old */
+	key->expiry = prep->expiry;
+	if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+		zap = rcu_dereference_key(key);
+	rcu_assign_keypointer(key, prep->payload.data[0]);
+	prep->payload.data[0] = NULL;
 
 	if (zap)
 		kfree_rcu(zap, rcu);
-
-error:
 	return ret;
 }
-
 EXPORT_SYMBOL_GPL(user_update);
 
 /*
diff --git a/security/loadpin/Kconfig b/security/loadpin/Kconfig
new file mode 100644
index 0000000..dd01aa9
--- /dev/null
+++ b/security/loadpin/Kconfig
@@ -0,0 +1,19 @@
+config SECURITY_LOADPIN
+	bool "Pin load of kernel files (modules, fw, etc) to one filesystem"
+	depends on SECURITY && BLOCK
+	help
+	  Any files read through the kernel file reading interface
+	  (kernel modules, firmware, kexec images, security policy)
+	  can be pinned to the first filesystem used for loading. When
+	  enabled, any files that come from other filesystems will be
+	  rejected. This is best used on systems without an initrd that
+	  have a root filesystem backed by a read-only device such as
+	  dm-verity or a CDROM.
+
+config SECURITY_LOADPIN_ENABLED
+	bool "Enforce LoadPin at boot"
+	depends on SECURITY_LOADPIN
+	help
+	  If selected, LoadPin will enforce pinning at boot. If not
+	  selected, it can be enabled at boot with the kernel parameter
+	  "loadpin.enabled=1".
diff --git a/security/loadpin/Makefile b/security/loadpin/Makefile
new file mode 100644
index 0000000..c2d77f8
--- /dev/null
+++ b/security/loadpin/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SECURITY_LOADPIN) += loadpin.o
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
new file mode 100644
index 0000000..89a46f1
--- /dev/null
+++ b/security/loadpin/loadpin.c
@@ -0,0 +1,190 @@
+/*
+ * Module and Firmware Pinning Security Module
+ *
+ * Copyright 2011-2016 Google Inc.
+ *
+ * Author: Kees Cook <keescook@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "LoadPin: " fmt
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_struct.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/sched.h>	/* current */
+#include <linux/string_helpers.h>
+
+static void report_load(const char *origin, struct file *file, char *operation)
+{
+	char *cmdline, *pathname;
+
+	pathname = kstrdup_quotable_file(file, GFP_KERNEL);
+	cmdline = kstrdup_quotable_cmdline(current, GFP_KERNEL);
+
+	pr_notice("%s %s obj=%s%s%s pid=%d cmdline=%s%s%s\n",
+		  origin, operation,
+		  (pathname && pathname[0] != '<') ? "\"" : "",
+		  pathname,
+		  (pathname && pathname[0] != '<') ? "\"" : "",
+		  task_pid_nr(current),
+		  cmdline ? "\"" : "", cmdline, cmdline ? "\"" : "");
+
+	kfree(cmdline);
+	kfree(pathname);
+}
+
+static int enabled = IS_ENABLED(CONFIG_SECURITY_LOADPIN_ENABLED);
+static struct super_block *pinned_root;
+static DEFINE_SPINLOCK(pinned_root_spinlock);
+
+#ifdef CONFIG_SYSCTL
+static int zero;
+static int one = 1;
+
+static struct ctl_path loadpin_sysctl_path[] = {
+	{ .procname = "kernel", },
+	{ .procname = "loadpin", },
+	{ }
+};
+
+static struct ctl_table loadpin_sysctl_table[] = {
+	{
+		.procname       = "enabled",
+		.data           = &enabled,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1         = &zero,
+		.extra2         = &one,
+	},
+	{ }
+};
+
+/*
+ * This must be called after early kernel init, since then the rootdev
+ * is available.
+ */
+static void check_pinning_enforcement(struct super_block *mnt_sb)
+{
+	bool ro = false;
+
+	/*
+	 * If load pinning is not enforced via a read-only block
+	 * device, allow sysctl to change modes for testing.
+	 */
+	if (mnt_sb->s_bdev) {
+		ro = bdev_read_only(mnt_sb->s_bdev);
+		pr_info("dev(%u,%u): %s\n",
+			MAJOR(mnt_sb->s_bdev->bd_dev),
+			MINOR(mnt_sb->s_bdev->bd_dev),
+			ro ? "read-only" : "writable");
+	} else
+		pr_info("mnt_sb lacks block device, treating as: writable\n");
+
+	if (!ro) {
+		if (!register_sysctl_paths(loadpin_sysctl_path,
+					   loadpin_sysctl_table))
+			pr_notice("sysctl registration failed!\n");
+		else
+			pr_info("load pinning can be disabled.\n");
+	} else
+		pr_info("load pinning engaged.\n");
+}
+#else
+static void check_pinning_enforcement(struct super_block *mnt_sb)
+{
+	pr_info("load pinning engaged.\n");
+}
+#endif
+
+static void loadpin_sb_free_security(struct super_block *mnt_sb)
+{
+	/*
+	 * When unmounting the filesystem we were using for load
+	 * pinning, we acknowledge the superblock release, but make sure
+	 * no other modules or firmware can be loaded.
+	 */
+	if (!IS_ERR_OR_NULL(pinned_root) && mnt_sb == pinned_root) {
+		pinned_root = ERR_PTR(-EIO);
+		pr_info("umount pinned fs: refusing further loads\n");
+	}
+}
+
+static int loadpin_read_file(struct file *file, enum kernel_read_file_id id)
+{
+	struct super_block *load_root;
+	const char *origin = kernel_read_file_id_str(id);
+
+	/* This handles the older init_module API that has a NULL file. */
+	if (!file) {
+		if (!enabled) {
+			report_load(origin, NULL, "old-api-pinning-ignored");
+			return 0;
+		}
+
+		report_load(origin, NULL, "old-api-denied");
+		return -EPERM;
+	}
+
+	load_root = file->f_path.mnt->mnt_sb;
+
+	/* First loaded module/firmware defines the root for all others. */
+	spin_lock(&pinned_root_spinlock);
+	/*
+	 * pinned_root is only NULL at startup. Otherwise, it is either
+	 * a valid reference, or an ERR_PTR.
+	 */
+	if (!pinned_root) {
+		pinned_root = load_root;
+		/*
+		 * Unlock now since it's only pinned_root we care about.
+		 * In the worst case, we will (correctly) report pinning
+		 * failures before we have announced that pinning is
+		 * enabled. This would be purely cosmetic.
+		 */
+		spin_unlock(&pinned_root_spinlock);
+		check_pinning_enforcement(pinned_root);
+		report_load(origin, file, "pinned");
+	} else {
+		spin_unlock(&pinned_root_spinlock);
+	}
+
+	if (IS_ERR_OR_NULL(pinned_root) || load_root != pinned_root) {
+		if (unlikely(!enabled)) {
+			report_load(origin, file, "pinning-ignored");
+			return 0;
+		}
+
+		report_load(origin, file, "denied");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static struct security_hook_list loadpin_hooks[] = {
+	LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security),
+	LSM_HOOK_INIT(kernel_read_file, loadpin_read_file),
+};
+
+void __init loadpin_add_hooks(void)
+{
+	pr_info("ready to pin (currently %sabled)", enabled ? "en" : "dis");
+	security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks));
+}
+
+/* Should not be mutable after boot, so not listed in sysfs (perm == 0). */
+module_param(enabled, int, 0);
+MODULE_PARM_DESC(enabled, "Pin module/firmware loading (default: true)");
diff --git a/security/security.c b/security/security.c
index d17e4a6..7095693 100644
--- a/security/security.c
+++ b/security/security.c
@@ -60,6 +60,7 @@
 	 */
 	capability_add_hooks();
 	yama_add_hooks();
+	loadpin_add_hooks();
 
 	/*
 	 * Load all the remaining security modules.
@@ -1848,7 +1849,6 @@
 	.tun_dev_attach =
 		LIST_HEAD_INIT(security_hook_heads.tun_dev_attach),
 	.tun_dev_open =	LIST_HEAD_INIT(security_hook_heads.tun_dev_open),
-	.skb_owned_by =	LIST_HEAD_INIT(security_hook_heads.skb_owned_by),
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3140efa..a86d537 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -259,7 +259,7 @@
 
 	might_sleep_if(may_sleep);
 
-	if (isec->initialized == LABEL_INVALID) {
+	if (ss_initialized && isec->initialized != LABEL_INITIALIZED) {
 		if (!may_sleep)
 			return -ECHILD;
 
@@ -297,6 +297,13 @@
 	return inode->i_security;
 }
 
+static struct inode_security_struct *backing_inode_security_novalidate(struct dentry *dentry)
+{
+	struct inode *inode = d_backing_inode(dentry);
+
+	return inode->i_security;
+}
+
 /*
  * Get the security label of a dentry's backing inode.
  */
@@ -687,7 +694,7 @@
 	struct superblock_security_struct *sbsec = sb->s_security;
 	const char *name = sb->s_type->name;
 	struct dentry *root = sbsec->sb->s_root;
-	struct inode_security_struct *root_isec = backing_inode_security(root);
+	struct inode_security_struct *root_isec;
 	u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
 	u32 defcontext_sid = 0;
 	char **mount_options = opts->mnt_opts;
@@ -730,6 +737,8 @@
 	    && (num_opts == 0))
 		goto out;
 
+	root_isec = backing_inode_security_novalidate(root);
+
 	/*
 	 * parse the mount options, check if they are valid sids.
 	 * also check if someone is trying to mount the same sb more
@@ -1623,7 +1632,7 @@
 
 /* Check whether a task is allowed to use a capability. */
 static int cred_has_capability(const struct cred *cred,
-			       int cap, int audit)
+			       int cap, int audit, bool initns)
 {
 	struct common_audit_data ad;
 	struct av_decision avd;
@@ -1637,10 +1646,10 @@
 
 	switch (CAP_TO_INDEX(cap)) {
 	case 0:
-		sclass = SECCLASS_CAPABILITY;
+		sclass = initns ? SECCLASS_CAPABILITY : SECCLASS_CAP_USERNS;
 		break;
 	case 1:
-		sclass = SECCLASS_CAPABILITY2;
+		sclass = initns ? SECCLASS_CAPABILITY2 : SECCLASS_CAP2_USERNS;
 		break;
 	default:
 		printk(KERN_ERR
@@ -1782,7 +1791,6 @@
 					 u32 *_new_isid)
 {
 	const struct superblock_security_struct *sbsec = dir->i_sb->s_security;
-	const struct inode_security_struct *dsec = inode_security(dir);
 	const struct task_security_struct *tsec = current_security();
 
 	if ((sbsec->flags & SE_SBINITIALIZED) &&
@@ -1792,6 +1800,7 @@
 		   tsec->create_sid) {
 		*_new_isid = tsec->create_sid;
 	} else {
+		const struct inode_security_struct *dsec = inode_security(dir);
 		return security_transition_sid(tsec->sid, dsec->sid, tclass,
 					       name, _new_isid);
 	}
@@ -2076,7 +2085,7 @@
 	u32 sid = task_sid(to);
 	struct file_security_struct *fsec = file->f_security;
 	struct dentry *dentry = file->f_path.dentry;
-	struct inode_security_struct *isec = backing_inode_security(dentry);
+	struct inode_security_struct *isec;
 	struct common_audit_data ad;
 	int rc;
 
@@ -2095,6 +2104,7 @@
 	if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
 		return 0;
 
+	isec = backing_inode_security(dentry);
 	return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file),
 			    &ad);
 }
@@ -2143,7 +2153,7 @@
 static int selinux_capable(const struct cred *cred, struct user_namespace *ns,
 			   int cap, int audit)
 {
-	return cred_has_capability(cred, cap, audit);
+	return cred_has_capability(cred, cap, audit, ns == &init_user_ns);
 }
 
 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
@@ -2221,7 +2231,7 @@
 	int rc, cap_sys_admin = 0;
 
 	rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN,
-					SECURITY_CAP_NOAUDIT);
+				 SECURITY_CAP_NOAUDIT, true);
 	if (rc == 0)
 		cap_sys_admin = 1;
 
@@ -2230,6 +2240,20 @@
 
 /* binprm security operations */
 
+static u32 ptrace_parent_sid(struct task_struct *task)
+{
+	u32 sid = 0;
+	struct task_struct *tracer;
+
+	rcu_read_lock();
+	tracer = ptrace_parent(task);
+	if (tracer)
+		sid = task_sid(tracer);
+	rcu_read_unlock();
+
+	return sid;
+}
+
 static int check_nnp_nosuid(const struct linux_binprm *bprm,
 			    const struct task_security_struct *old_tsec,
 			    const struct task_security_struct *new_tsec)
@@ -2351,18 +2375,7 @@
 		 * changes its SID has the appropriate permit */
 		if (bprm->unsafe &
 		    (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
-			struct task_struct *tracer;
-			struct task_security_struct *sec;
-			u32 ptsid = 0;
-
-			rcu_read_lock();
-			tracer = ptrace_parent(current);
-			if (likely(tracer != NULL)) {
-				sec = __task_cred(tracer)->security;
-				ptsid = sec->sid;
-			}
-			rcu_read_unlock();
-
+			u32 ptsid = ptrace_parent_sid(current);
 			if (ptsid != 0) {
 				rc = avc_has_perm(ptsid, new_tsec->sid,
 						  SECCLASS_PROCESS,
@@ -3046,7 +3059,7 @@
 				  const void *value, size_t size, int flags)
 {
 	struct inode *inode = d_backing_inode(dentry);
-	struct inode_security_struct *isec = backing_inode_security(dentry);
+	struct inode_security_struct *isec;
 	struct superblock_security_struct *sbsec;
 	struct common_audit_data ad;
 	u32 newsid, sid = current_sid();
@@ -3065,6 +3078,7 @@
 	ad.type = LSM_AUDIT_DATA_DENTRY;
 	ad.u.dentry = dentry;
 
+	isec = backing_inode_security(dentry);
 	rc = avc_has_perm(sid, isec->sid, isec->sclass,
 			  FILE__RELABELFROM, &ad);
 	if (rc)
@@ -3123,7 +3137,7 @@
 					int flags)
 {
 	struct inode *inode = d_backing_inode(dentry);
-	struct inode_security_struct *isec = backing_inode_security(dentry);
+	struct inode_security_struct *isec;
 	u32 newsid;
 	int rc;
 
@@ -3140,6 +3154,7 @@
 		return;
 	}
 
+	isec = backing_inode_security(dentry);
 	isec->sclass = inode_mode_to_security_class(inode->i_mode);
 	isec->sid = newsid;
 	isec->initialized = LABEL_INITIALIZED;
@@ -3181,7 +3196,7 @@
 	u32 size;
 	int error;
 	char *context = NULL;
-	struct inode_security_struct *isec = inode_security(inode);
+	struct inode_security_struct *isec;
 
 	if (strcmp(name, XATTR_SELINUX_SUFFIX))
 		return -EOPNOTSUPP;
@@ -3199,7 +3214,8 @@
 			    SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = cred_has_capability(current_cred(), CAP_MAC_ADMIN,
-					    SECURITY_CAP_NOAUDIT);
+					    SECURITY_CAP_NOAUDIT, true);
+	isec = inode_security(inode);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
 						      &size);
@@ -3220,7 +3236,7 @@
 static int selinux_inode_setsecurity(struct inode *inode, const char *name,
 				     const void *value, size_t size, int flags)
 {
-	struct inode_security_struct *isec = inode_security(inode);
+	struct inode_security_struct *isec = inode_security_novalidate(inode);
 	u32 newsid;
 	int rc;
 
@@ -3309,7 +3325,7 @@
 	struct common_audit_data ad;
 	struct file_security_struct *fsec = file->f_security;
 	struct inode *inode = file_inode(file);
-	struct inode_security_struct *isec = inode_security(inode);
+	struct inode_security_struct *isec;
 	struct lsm_ioctlop_audit ioctl;
 	u32 ssid = cred_sid(cred);
 	int rc;
@@ -3333,6 +3349,7 @@
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
 
+	isec = inode_security(inode);
 	rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass,
 			requested, driver, xperm, &ad);
 out:
@@ -3374,7 +3391,7 @@
 	case KDSKBENT:
 	case KDSKBSENT:
 		error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG,
-					    SECURITY_CAP_AUDIT);
+					    SECURITY_CAP_AUDIT, true);
 		break;
 
 	/* default case assumes that the command will go
@@ -3463,8 +3480,9 @@
 		    vma->vm_end <= vma->vm_mm->brk) {
 			rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP);
 		} else if (!vma->vm_file &&
-			   vma->vm_start <= vma->vm_mm->start_stack &&
-			   vma->vm_end >= vma->vm_mm->start_stack) {
+			   ((vma->vm_start <= vma->vm_mm->start_stack &&
+			     vma->vm_end >= vma->vm_mm->start_stack) ||
+			    vma_is_stack_for_task(vma, current))) {
 			rc = current_has_perm(current, PROCESS__EXECSTACK);
 		} else if (vma->vm_file && vma->anon_vma) {
 			/*
@@ -3720,6 +3738,52 @@
 			    SYSTEM__MODULE_REQUEST, &ad);
 }
 
+static int selinux_kernel_module_from_file(struct file *file)
+{
+	struct common_audit_data ad;
+	struct inode_security_struct *isec;
+	struct file_security_struct *fsec;
+	u32 sid = current_sid();
+	int rc;
+
+	/* init_module */
+	if (file == NULL)
+		return avc_has_perm(sid, sid, SECCLASS_SYSTEM,
+					SYSTEM__MODULE_LOAD, NULL);
+
+	/* finit_module */
+
+	ad.type = LSM_AUDIT_DATA_PATH;
+	ad.u.path = file->f_path;
+
+	fsec = file->f_security;
+	if (sid != fsec->sid) {
+		rc = avc_has_perm(sid, fsec->sid, SECCLASS_FD, FD__USE, &ad);
+		if (rc)
+			return rc;
+	}
+
+	isec = inode_security(file_inode(file));
+	return avc_has_perm(sid, isec->sid, SECCLASS_SYSTEM,
+				SYSTEM__MODULE_LOAD, &ad);
+}
+
+static int selinux_kernel_read_file(struct file *file,
+				    enum kernel_read_file_id id)
+{
+	int rc = 0;
+
+	switch (id) {
+	case READING_MODULE:
+		rc = selinux_kernel_module_from_file(file);
+		break;
+	default:
+		break;
+	}
+
+	return rc;
+}
+
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return current_has_perm(p, PROCESS__SETPGID);
@@ -4599,6 +4663,7 @@
 {
 	u32 peer_secid = SECSID_NULL;
 	u16 family;
+	struct inode_security_struct *isec;
 
 	if (skb && skb->protocol == htons(ETH_P_IP))
 		family = PF_INET;
@@ -4609,9 +4674,10 @@
 	else
 		goto out;
 
-	if (sock && family == PF_UNIX)
-		selinux_inode_getsecid(SOCK_INODE(sock), &peer_secid);
-	else if (skb)
+	if (sock && family == PF_UNIX) {
+		isec = inode_security_novalidate(SOCK_INODE(sock));
+		peer_secid = isec->sid;
+	} else if (skb)
 		selinux_skb_peerlbl_sid(skb, family, &peer_secid);
 
 out:
@@ -5676,7 +5742,6 @@
 			       char *name, void *value, size_t size)
 {
 	struct task_security_struct *tsec;
-	struct task_struct *tracer;
 	struct cred *new;
 	u32 sid = 0, ptsid;
 	int error;
@@ -5783,14 +5848,8 @@
 
 		/* Check for ptracing, and update the task SID if ok.
 		   Otherwise, leave SID unchanged and fail. */
-		ptsid = 0;
-		rcu_read_lock();
-		tracer = ptrace_parent(p);
-		if (tracer)
-			ptsid = task_sid(tracer);
-		rcu_read_unlock();
-
-		if (tracer) {
+		ptsid = ptrace_parent_sid(p);
+		if (ptsid != 0) {
 			error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
 					     PROCESS__PTRACE, NULL);
 			if (error)
@@ -6021,6 +6080,7 @@
 	LSM_HOOK_INIT(kernel_act_as, selinux_kernel_act_as),
 	LSM_HOOK_INIT(kernel_create_files_as, selinux_kernel_create_files_as),
 	LSM_HOOK_INIT(kernel_module_request, selinux_kernel_module_request),
+	LSM_HOOK_INIT(kernel_read_file, selinux_kernel_read_file),
 	LSM_HOOK_INIT(task_setpgid, selinux_task_setpgid),
 	LSM_HOOK_INIT(task_getpgid, selinux_task_getpgid),
 	LSM_HOOK_INIT(task_getsid, selinux_task_getsid),
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index ef83c4b..1f1f4b2 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -12,6 +12,18 @@
 #define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \
 	    "write", "associate", "unix_read", "unix_write"
 
+#define COMMON_CAP_PERMS  "chown", "dac_override", "dac_read_search", \
+	    "fowner", "fsetid", "kill", "setgid", "setuid", "setpcap", \
+	    "linux_immutable", "net_bind_service", "net_broadcast", \
+	    "net_admin", "net_raw", "ipc_lock", "ipc_owner", "sys_module", \
+	    "sys_rawio", "sys_chroot", "sys_ptrace", "sys_pacct", "sys_admin", \
+	    "sys_boot", "sys_nice", "sys_resource", "sys_time", \
+	    "sys_tty_config", "mknod", "lease", "audit_write", \
+	    "audit_control", "setfcap"
+
+#define COMMON_CAP2_PERMS  "mac_override", "mac_admin", "syslog", \
+		"wake_alarm", "block_suspend", "audit_read"
+
 /*
  * Note: The name for any socket class should be suffixed by "socket",
  *	 and doesn't contain more than one substr of "socket".
@@ -32,16 +44,9 @@
 	    "setsockcreate", NULL } },
 	{ "system",
 	  { "ipc_info", "syslog_read", "syslog_mod",
-	    "syslog_console", "module_request", NULL } },
+	    "syslog_console", "module_request", "module_load", NULL } },
 	{ "capability",
-	  { "chown", "dac_override", "dac_read_search",
-	    "fowner", "fsetid", "kill", "setgid", "setuid", "setpcap",
-	    "linux_immutable", "net_bind_service", "net_broadcast",
-	    "net_admin", "net_raw", "ipc_lock", "ipc_owner", "sys_module",
-	    "sys_rawio", "sys_chroot", "sys_ptrace", "sys_pacct", "sys_admin",
-	    "sys_boot", "sys_nice", "sys_resource", "sys_time",
-	    "sys_tty_config", "mknod", "lease", "audit_write",
-	    "audit_control", "setfcap", NULL } },
+	  { COMMON_CAP_PERMS, NULL } },
 	{ "filesystem",
 	  { "mount", "remount", "unmount", "getattr",
 	    "relabelfrom", "relabelto", "associate", "quotamod",
@@ -150,12 +155,15 @@
 	{ "memprotect", { "mmap_zero", NULL } },
 	{ "peer", { "recv", NULL } },
 	{ "capability2",
-	  { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend",
-	    "audit_read", NULL } },
+	  { COMMON_CAP2_PERMS, NULL } },
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, "attach_queue", NULL } },
 	{ "binder", { "impersonate", "call", "set_context_mgr", "transfer",
 		      NULL } },
+	{ "cap_userns",
+	  { COMMON_CAP_PERMS, NULL } },
+	{ "cap2_userns",
+	  { COMMON_CAP2_PERMS, NULL } },
 	{ NULL }
   };
diff --git a/security/selinux/include/conditional.h b/security/selinux/include/conditional.h
index 67ce7a8..ff4fddc 100644
--- a/security/selinux/include/conditional.h
+++ b/security/selinux/include/conditional.h
@@ -17,6 +17,6 @@
 
 int security_set_bools(int len, int *values);
 
-int security_get_bool_value(int bool);
+int security_get_bool_value(int index);
 
 #endif
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index a2ae054..c21e135 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -38,9 +38,8 @@
 };
 
 enum label_initialized {
-	LABEL_MISSING,		/* not initialized */
-	LABEL_INITIALIZED,	/* inizialized */
-	LABEL_INVALID		/* invalid */
+	LABEL_INVALID,		/* invalid or not initialized */
+	LABEL_INITIALIZED	/* initialized */
 };
 
 struct inode_security_struct {
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index ebda973..89df646 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2696,7 +2696,7 @@
 	return rc;
 }
 
-int security_get_bool_value(int bool)
+int security_get_bool_value(int index)
 {
 	int rc;
 	int len;
@@ -2705,10 +2705,10 @@
 
 	rc = -EFAULT;
 	len = policydb.p_bools.nprim;
-	if (bool >= len)
+	if (index >= len)
 		goto out;
 
-	rc = policydb.bool_val_to_struct[bool]->state;
+	rc = policydb.bool_val_to_struct[index]->state;
 out:
 	read_unlock(&policy_rwlock);
 	return rc;
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index cb6ed10..9b756b1 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -18,6 +18,7 @@
 #include <linux/prctl.h>
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
+#include <linux/string_helpers.h>
 
 #define YAMA_SCOPE_DISABLED	0
 #define YAMA_SCOPE_RELATIONAL	1
@@ -41,6 +42,22 @@
 static void yama_relation_cleanup(struct work_struct *work);
 static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
 
+static void report_access(const char *access, struct task_struct *target,
+			  struct task_struct *agent)
+{
+	char *target_cmd, *agent_cmd;
+
+	target_cmd = kstrdup_quotable_cmdline(target, GFP_ATOMIC);
+	agent_cmd = kstrdup_quotable_cmdline(agent, GFP_ATOMIC);
+
+	pr_notice_ratelimited(
+		"ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
+		access, target_cmd, target->pid, agent_cmd, agent->pid);
+
+	kfree(agent_cmd);
+	kfree(target_cmd);
+}
+
 /**
  * yama_relation_cleanup - remove invalid entries from the relation list
  *
@@ -307,11 +324,8 @@
 		}
 	}
 
-	if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) {
-		printk_ratelimited(KERN_NOTICE
-			"ptrace of pid %d was attempted by: %s (pid %d)\n",
-			child->pid, current->comm, current->pid);
-	}
+	if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0)
+		report_access("attach", child, current);
 
 	return rc;
 }
@@ -337,11 +351,8 @@
 		break;
 	}
 
-	if (rc) {
-		printk_ratelimited(KERN_NOTICE
-			"ptraceme of pid %d was attempted by: %s (pid %d)\n",
-			current->pid, parent->comm, parent->pid);
-	}
+	if (rc)
+		report_access("traceme", current, parent);
 
 	return rc;
 }
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index 6d12ca9..9749f9e 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -141,35 +141,6 @@
 	  Say Y here to use the HR-timer backend as the default sequencer
 	  timer.
 
-config SND_RTCTIMER
-	tristate "RTC Timer support"
-	depends on RTC
-	select SND_TIMER
-	help
-	  Say Y here to enable RTC timer support for ALSA.  ALSA uses
-	  the RTC timer as a precise timing source and maps the RTC
-	  timer to ALSA's timer interface.  The ALSA sequencer code also
-	  can use this timing source.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called snd-rtctimer.
-
-	  Note that this option is exclusive with the new RTC drivers
-	  (CONFIG_RTC_CLASS) since this requires the old API.
-
-config SND_SEQ_RTCTIMER_DEFAULT
-	bool "Use RTC as default sequencer timer"
-	depends on SND_RTCTIMER && SND_SEQUENCER
-	depends on !SND_SEQ_HRTIMER_DEFAULT
-	default y
-	help
-	  Say Y here to use the RTC timer as the default sequencer
-	  timer.  This is strongly recommended because it ensures
-	  precise MIDI timing even when the system timer runs at less
-	  than 1000 Hz.
-
-	  If in doubt, say Y.
-
 config SND_DYNAMIC_MINORS
 	bool "Dynamic device file minor numbers"
 	help
diff --git a/sound/core/Makefile b/sound/core/Makefile
index 48ab4b8..e85d9dd 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -37,7 +37,6 @@
 obj-$(CONFIG_SND_HWDEP)		+= snd-hwdep.o
 obj-$(CONFIG_SND_TIMER)		+= snd-timer.o
 obj-$(CONFIG_SND_HRTIMER)	+= snd-hrtimer.o
-obj-$(CONFIG_SND_RTCTIMER)	+= snd-rtctimer.o
 obj-$(CONFIG_SND_PCM)		+= snd-pcm.o
 obj-$(CONFIG_SND_DMAENGINE_PCM)	+= snd-pcm-dmaengine.o
 obj-$(CONFIG_SND_RAWMIDI)	+= snd-rawmidi.o
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index a9933c0..9b3334b 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -288,9 +288,12 @@
 	stream = &data->stream;
 	mutex_lock(&stream->device->lock);
 	/* write is allowed when stream is running or has been steup */
-	if (stream->runtime->state != SNDRV_PCM_STATE_SETUP &&
-	    stream->runtime->state != SNDRV_PCM_STATE_PREPARED &&
-			stream->runtime->state != SNDRV_PCM_STATE_RUNNING) {
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_SETUP:
+	case SNDRV_PCM_STATE_PREPARED:
+	case SNDRV_PCM_STATE_RUNNING:
+		break;
+	default:
 		mutex_unlock(&stream->device->lock);
 		return -EBADFD;
 	}
@@ -391,14 +394,13 @@
 	int retval = 0;
 
 	if (snd_BUG_ON(!data))
-		return -EFAULT;
+		return POLLERR;
+
 	stream = &data->stream;
-	if (snd_BUG_ON(!stream))
-		return -EFAULT;
 
 	mutex_lock(&stream->device->lock);
 	if (stream->runtime->state == SNDRV_PCM_STATE_OPEN) {
-		retval = -EBADFD;
+		retval = snd_compr_get_poll(stream) | POLLERR;
 		goto out;
 	}
 	poll_wait(f, &stream->runtime->sleep, wait);
@@ -421,10 +423,7 @@
 			retval = snd_compr_get_poll(stream);
 		break;
 	default:
-		if (stream->direction == SND_COMPRESS_PLAYBACK)
-			retval = POLLOUT | POLLWRNORM | POLLERR;
-		else
-			retval = POLLIN | POLLRDNORM | POLLERR;
+		retval = snd_compr_get_poll(stream) | POLLERR;
 		break;
 	}
 out:
@@ -802,9 +801,9 @@
 
 	if (snd_BUG_ON(!data))
 		return -EFAULT;
+
 	stream = &data->stream;
-	if (snd_BUG_ON(!stream))
-		return -EFAULT;
+
 	mutex_lock(&stream->device->lock);
 	switch (_IOC_NR(cmd)) {
 	case _IOC_NR(SNDRV_COMPRESS_IOCTL_VERSION):
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
index 656d9a9..e2f2702 100644
--- a/sound/core/hrtimer.c
+++ b/sound/core/hrtimer.c
@@ -38,37 +38,53 @@
 struct snd_hrtimer {
 	struct snd_timer *timer;
 	struct hrtimer hrt;
-	atomic_t running;
+	bool in_callback;
 };
 
 static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt)
 {
 	struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt);
 	struct snd_timer *t = stime->timer;
-	unsigned long oruns;
+	ktime_t delta;
+	unsigned long ticks;
+	enum hrtimer_restart ret = HRTIMER_NORESTART;
 
-	if (!atomic_read(&stime->running))
-		return HRTIMER_NORESTART;
+	spin_lock(&t->lock);
+	if (!t->running)
+		goto out; /* fast path */
+	stime->in_callback = true;
+	ticks = t->sticks;
+	spin_unlock(&t->lock);
 
-	oruns = hrtimer_forward_now(hrt, ns_to_ktime(t->sticks * resolution));
-	snd_timer_interrupt(stime->timer, t->sticks * oruns);
+	/* calculate the drift */
+	delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt));
+	if (delta.tv64 > 0)
+		ticks += ktime_divns(delta, ticks * resolution);
 
-	if (!atomic_read(&stime->running))
-		return HRTIMER_NORESTART;
-	return HRTIMER_RESTART;
+	snd_timer_interrupt(stime->timer, ticks);
+
+	spin_lock(&t->lock);
+	if (t->running) {
+		hrtimer_add_expires_ns(hrt, t->sticks * resolution);
+		ret = HRTIMER_RESTART;
+	}
+
+	stime->in_callback = false;
+ out:
+	spin_unlock(&t->lock);
+	return ret;
 }
 
 static int snd_hrtimer_open(struct snd_timer *t)
 {
 	struct snd_hrtimer *stime;
 
-	stime = kmalloc(sizeof(*stime), GFP_KERNEL);
+	stime = kzalloc(sizeof(*stime), GFP_KERNEL);
 	if (!stime)
 		return -ENOMEM;
 	hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	stime->timer = t;
 	stime->hrt.function = snd_hrtimer_callback;
-	atomic_set(&stime->running, 0);
 	t->private_data = stime;
 	return 0;
 }
@@ -78,6 +94,11 @@
 	struct snd_hrtimer *stime = t->private_data;
 
 	if (stime) {
+		spin_lock_irq(&t->lock);
+		t->running = 0; /* just to be sure */
+		stime->in_callback = 1; /* skip start/stop */
+		spin_unlock_irq(&t->lock);
+
 		hrtimer_cancel(&stime->hrt);
 		kfree(stime);
 		t->private_data = NULL;
@@ -89,18 +110,19 @@
 {
 	struct snd_hrtimer *stime = t->private_data;
 
-	atomic_set(&stime->running, 0);
-	hrtimer_try_to_cancel(&stime->hrt);
+	if (stime->in_callback)
+		return 0;
 	hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution),
 		      HRTIMER_MODE_REL);
-	atomic_set(&stime->running, 1);
 	return 0;
 }
 
 static int snd_hrtimer_stop(struct snd_timer *t)
 {
 	struct snd_hrtimer *stime = t->private_data;
-	atomic_set(&stime->running, 0);
+
+	if (stime->in_callback)
+		return 0;
 	hrtimer_try_to_cancel(&stime->hrt);
 	return 0;
 }
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index 697c166..8eb58c7 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -106,8 +106,9 @@
  * direction of the substream. If the substream is a playback stream the dst
  * fields will be initialized, if it is a capture stream the src fields will be
  * initialized. The {dst,src}_addr_width field will only be initialized if the
- * addr_width field of the DAI DMA data struct is not equal to
- * DMA_SLAVE_BUSWIDTH_UNDEFINED.
+ * SND_DMAENGINE_PCM_DAI_FLAG_PACK flag is set or if the addr_width field of
+ * the DAI DMA data struct is not equal to DMA_SLAVE_BUSWIDTH_UNDEFINED. If
+ * both conditions are met the latter takes priority.
  */
 void snd_dmaengine_pcm_set_config_from_dai_data(
 	const struct snd_pcm_substream *substream,
@@ -117,11 +118,17 @@
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		slave_config->dst_addr = dma_data->addr;
 		slave_config->dst_maxburst = dma_data->maxburst;
+		if (dma_data->flags & SND_DMAENGINE_PCM_DAI_FLAG_PACK)
+			slave_config->dst_addr_width =
+				DMA_SLAVE_BUSWIDTH_UNDEFINED;
 		if (dma_data->addr_width != DMA_SLAVE_BUSWIDTH_UNDEFINED)
 			slave_config->dst_addr_width = dma_data->addr_width;
 	} else {
 		slave_config->src_addr = dma_data->addr;
 		slave_config->src_maxburst = dma_data->maxburst;
+		if (dma_data->flags & SND_DMAENGINE_PCM_DAI_FLAG_PACK)
+			slave_config->src_addr_width =
+				DMA_SLAVE_BUSWIDTH_UNDEFINED;
 		if (dma_data->addr_width != DMA_SLAVE_BUSWIDTH_UNDEFINED)
 			slave_config->src_addr_width = dma_data->addr_width;
 	}
diff --git a/sound/core/pcm_iec958.c b/sound/core/pcm_iec958.c
index 36b2d7a..5e6aed6 100644
--- a/sound/core/pcm_iec958.c
+++ b/sound/core/pcm_iec958.c
@@ -9,30 +9,18 @@
 #include <linux/types.h>
 #include <sound/asoundef.h>
 #include <sound/pcm.h>
+#include <sound/pcm_params.h>
 #include <sound/pcm_iec958.h>
 
-/**
- * snd_pcm_create_iec958_consumer - create consumer format IEC958 channel status
- * @runtime: pcm runtime structure with ->rate filled in
- * @cs: channel status buffer, at least four bytes
- * @len: length of channel status buffer
- *
- * Create the consumer format channel status data in @cs of maximum size
- * @len corresponding to the parameters of the PCM runtime @runtime.
- *
- * Drivers may wish to tweak the contents of the buffer after creation.
- *
- * Returns: length of buffer, or negative error code if something failed.
- */
-int snd_pcm_create_iec958_consumer(struct snd_pcm_runtime *runtime, u8 *cs,
-	size_t len)
+static int create_iec958_consumer(uint rate, uint sample_width,
+				  u8 *cs, size_t len)
 {
 	unsigned int fs, ws;
 
 	if (len < 4)
 		return -EINVAL;
 
-	switch (runtime->rate) {
+	switch (rate) {
 	case 32000:
 		fs = IEC958_AES3_CON_FS_32000;
 		break;
@@ -59,7 +47,7 @@
 	}
 
 	if (len > 4) {
-		switch (snd_pcm_format_width(runtime->format)) {
+		switch (sample_width) {
 		case 16:
 			ws = IEC958_AES4_CON_WORDLEN_20_16;
 			break;
@@ -71,6 +59,7 @@
 			     IEC958_AES4_CON_MAX_WORDLEN_24;
 			break;
 		case 24:
+		case 32: /* Assume 24-bit width for 32-bit samples. */
 			ws = IEC958_AES4_CON_WORDLEN_24_20 |
 			     IEC958_AES4_CON_MAX_WORDLEN_24;
 			break;
@@ -92,4 +81,46 @@
 
 	return len;
 }
+
+/**
+ * snd_pcm_create_iec958_consumer - create consumer format IEC958 channel status
+ * @runtime: pcm runtime structure with ->rate filled in
+ * @cs: channel status buffer, at least four bytes
+ * @len: length of channel status buffer
+ *
+ * Create the consumer format channel status data in @cs of maximum size
+ * @len corresponding to the parameters of the PCM runtime @runtime.
+ *
+ * Drivers may wish to tweak the contents of the buffer after creation.
+ *
+ * Returns: length of buffer, or negative error code if something failed.
+ */
+int snd_pcm_create_iec958_consumer(struct snd_pcm_runtime *runtime, u8 *cs,
+	size_t len)
+{
+	return create_iec958_consumer(runtime->rate,
+				      snd_pcm_format_width(runtime->format),
+				      cs, len);
+}
 EXPORT_SYMBOL(snd_pcm_create_iec958_consumer);
+
+/**
+ * snd_pcm_create_iec958_consumer_hw_params - create IEC958 channel status
+ * @hw_params: the hw_params instance for extracting rate and sample format
+ * @cs: channel status buffer, at least four bytes
+ * @len: length of channel status buffer
+ *
+ * Create the consumer format channel status data in @cs of maximum size
+ * @len corresponding to the parameters of the PCM runtime @runtime.
+ *
+ * Drivers may wish to tweak the contents of the buffer after creation.
+ *
+ * Returns: length of buffer, or negative error code if something failed.
+ */
+int snd_pcm_create_iec958_consumer_hw_params(struct snd_pcm_hw_params *params,
+					     u8 *cs, size_t len)
+{
+	return create_iec958_consumer(params_rate(params), params_width(params),
+				      cs, len);
+}
+EXPORT_SYMBOL(snd_pcm_create_iec958_consumer_hw_params);
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 3a9b66c..bb12615 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1886,8 +1886,8 @@
 		snd_timer_interrupt(substream->timer, 1);
 #endif
  _end:
-	snd_pcm_stream_unlock_irqrestore(substream, flags);
 	kill_fasync(&runtime->fasync, SIGIO, POLL_IN);
+	snd_pcm_stream_unlock_irqrestore(substream, flags);
 }
 
 EXPORT_SYMBOL(snd_pcm_period_elapsed);
@@ -2595,6 +2595,8 @@
 	};
 	int err;
 
+	if (WARN_ON(pcm->streams[stream].chmap_kctl))
+		return -EBUSY;
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 9106d8e..c61fd50 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3161,7 +3161,7 @@
 
 	substream = pcm_file->substream;
 	if (PCM_RUNTIME_CHECK(substream))
-		return -ENXIO;
+		return POLLOUT | POLLWRNORM | POLLERR;
 	runtime = substream->runtime;
 
 	poll_wait(file, &runtime->sleep, wait);
@@ -3200,7 +3200,7 @@
 
 	substream = pcm_file->substream;
 	if (PCM_RUNTIME_CHECK(substream))
-		return -ENXIO;
+		return POLLIN | POLLRDNORM | POLLERR;
 	runtime = substream->runtime;
 
 	poll_wait(file, &runtime->sleep, wait);
diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c
deleted file mode 100644
index f3420d1..0000000
--- a/sound/core/rtctimer.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- *  RTC based high-frequency timer
- *
- *  Copyright (C) 2000 Takashi Iwai
- *	based on rtctimer.c by Steve Ratcliffe
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/log2.h>
-#include <sound/core.h>
-#include <sound/timer.h>
-
-#if IS_ENABLED(CONFIG_RTC)
-
-#include <linux/mc146818rtc.h>
-
-#define RTC_FREQ	1024		/* default frequency */
-#define NANO_SEC	1000000000L	/* 10^9 in sec */
-
-/*
- * prototypes
- */
-static int rtctimer_open(struct snd_timer *t);
-static int rtctimer_close(struct snd_timer *t);
-static int rtctimer_start(struct snd_timer *t);
-static int rtctimer_stop(struct snd_timer *t);
-
-
-/*
- * The hardware dependent description for this timer.
- */
-static struct snd_timer_hardware rtc_hw = {
-	.flags =	SNDRV_TIMER_HW_AUTO |
-			SNDRV_TIMER_HW_FIRST |
-			SNDRV_TIMER_HW_TASKLET,
-	.ticks =	100000000L,		/* FIXME: XXX */
-	.open =		rtctimer_open,
-	.close =	rtctimer_close,
-	.start =	rtctimer_start,
-	.stop =		rtctimer_stop,
-};
-
-static int rtctimer_freq = RTC_FREQ;		/* frequency */
-static struct snd_timer *rtctimer;
-static struct tasklet_struct rtc_tasklet;
-static rtc_task_t rtc_task;
-
-
-static int
-rtctimer_open(struct snd_timer *t)
-{
-	int err;
-
-	err = rtc_register(&rtc_task);
-	if (err < 0)
-		return err;
-	t->private_data = &rtc_task;
-	return 0;
-}
-
-static int
-rtctimer_close(struct snd_timer *t)
-{
-	rtc_task_t *rtc = t->private_data;
-	if (rtc) {
-		rtc_unregister(rtc);
-		tasklet_kill(&rtc_tasklet);
-		t->private_data = NULL;
-	}
-	return 0;
-}
-
-static int
-rtctimer_start(struct snd_timer *timer)
-{
-	rtc_task_t *rtc = timer->private_data;
-	if (snd_BUG_ON(!rtc))
-		return -EINVAL;
-	rtc_control(rtc, RTC_IRQP_SET, rtctimer_freq);
-	rtc_control(rtc, RTC_PIE_ON, 0);
-	return 0;
-}
-
-static int
-rtctimer_stop(struct snd_timer *timer)
-{
-	rtc_task_t *rtc = timer->private_data;
-	if (snd_BUG_ON(!rtc))
-		return -EINVAL;
-	rtc_control(rtc, RTC_PIE_OFF, 0);
-	return 0;
-}
-
-static void rtctimer_tasklet(unsigned long data)
-{
-	snd_timer_interrupt((struct snd_timer *)data, 1);
-}
-
-/*
- * interrupt
- */
-static void rtctimer_interrupt(void *private_data)
-{
-	tasklet_schedule(private_data);
-}
-
-
-/*
- *  ENTRY functions
- */
-static int __init rtctimer_init(void)
-{
-	int err;
-	struct snd_timer *timer;
-
-	if (rtctimer_freq < 2 || rtctimer_freq > 8192 ||
-	    !is_power_of_2(rtctimer_freq)) {
-		pr_err("ALSA: rtctimer: invalid frequency %d\n", rtctimer_freq);
-		return -EINVAL;
-	}
-
-	/* Create a new timer and set up the fields */
-	err = snd_timer_global_new("rtc", SNDRV_TIMER_GLOBAL_RTC, &timer);
-	if (err < 0)
-		return err;
-
-	timer->module = THIS_MODULE;
-	strcpy(timer->name, "RTC timer");
-	timer->hw = rtc_hw;
-	timer->hw.resolution = NANO_SEC / rtctimer_freq;
-
-	tasklet_init(&rtc_tasklet, rtctimer_tasklet, (unsigned long)timer);
-
-	/* set up RTC callback */
-	rtc_task.func = rtctimer_interrupt;
-	rtc_task.private_data = &rtc_tasklet;
-
-	err = snd_timer_global_register(timer);
-	if (err < 0) {
-		snd_timer_global_free(timer);
-		return err;
-	}
-	rtctimer = timer; /* remember this */
-
-	return 0;
-}
-
-static void __exit rtctimer_exit(void)
-{
-	if (rtctimer) {
-		snd_timer_global_free(rtctimer);
-		rtctimer = NULL;
-	}
-}
-
-
-/*
- * exported stuff
- */
-module_init(rtctimer_init)
-module_exit(rtctimer_exit)
-
-module_param(rtctimer_freq, int, 0444);
-MODULE_PARM_DESC(rtctimer_freq, "timer frequency in Hz");
-
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("snd-timer-" __stringify(SNDRV_TIMER_GLOBAL_RTC));
-
-#endif /* IS_ENABLED(CONFIG_RTC) */
diff --git a/sound/core/seq/seq.c b/sound/core/seq/seq.c
index 7e0aabb..639544b 100644
--- a/sound/core/seq/seq.c
+++ b/sound/core/seq/seq.c
@@ -47,8 +47,6 @@
 int seq_default_timer_device =
 #ifdef CONFIG_SND_SEQ_HRTIMER_DEFAULT
 	SNDRV_TIMER_GLOBAL_HRTIMER
-#elif defined(CONFIG_SND_SEQ_RTCTIMER_DEFAULT)
-	SNDRV_TIMER_GLOBAL_RTC
 #else
 	SNDRV_TIMER_GLOBAL_SYSTEM
 #endif
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 6469bed..e722022 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -37,8 +37,6 @@
 
 #if IS_ENABLED(CONFIG_SND_HRTIMER)
 #define DEFAULT_TIMER_LIMIT 4
-#elif IS_ENABLED(CONFIG_SND_RTCTIMER)
-#define DEFAULT_TIMER_LIMIT 2
 #else
 #define DEFAULT_TIMER_LIMIT 1
 #endif
@@ -1225,6 +1223,7 @@
 		tu->tstamp = *tstamp;
 	if ((tu->filter & (1 << event)) == 0 || !tu->tread)
 		return;
+	memset(&r1, 0, sizeof(r1));
 	r1.event = event;
 	r1.tstamp = *tstamp;
 	r1.val = resolution;
@@ -1267,6 +1266,7 @@
 	}
 	if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) &&
 	    tu->last_resolution != resolution) {
+		memset(&r1, 0, sizeof(r1));
 		r1.event = SNDRV_TIMER_EVENT_RESOLUTION;
 		r1.tstamp = tstamp;
 		r1.val = resolution;
@@ -1739,6 +1739,7 @@
 	if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) {
 		if (tu->tread) {
 			struct snd_timer_tread tread;
+			memset(&tread, 0, sizeof(tread));
 			tread.event = SNDRV_TIMER_EVENT_EARLY;
 			tread.tstamp.tv_sec = 0;
 			tread.tstamp.tv_nsec = 0;
diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig
index 2a779c2..ab894ed 100644
--- a/sound/firewire/Kconfig
+++ b/sound/firewire/Kconfig
@@ -134,6 +134,7 @@
 	 Say Y here to include support for TASCAM.
 	  * FW-1884
 	  * FW-1082
+	  * FW-1804
 
 	 To compile this driver as a module, choose M here: the module
 	 will be called snd-firewire-tascam.
diff --git a/sound/firewire/Makefile b/sound/firewire/Makefile
index 003c090..0ee1fb1 100644
--- a/sound/firewire/Makefile
+++ b/sound/firewire/Makefile
@@ -1,3 +1,6 @@
+# To find a header included by define_trace.h.
+CFLAGS_amdtp-stream.o	:= -I$(src)
+
 snd-firewire-lib-objs := lib.o iso-resources.o packets-buffer.o \
 			 fcp.o cmp.o amdtp-stream.o amdtp-am824.o
 snd-isight-objs := isight.o
diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h
new file mode 100644
index 0000000..9c04faf
--- /dev/null
+++ b/sound/firewire/amdtp-stream-trace.h
@@ -0,0 +1,110 @@
+/*
+ * amdtp-stream-trace.h - tracepoint definitions to dump a part of packet data
+ *
+ * Copyright (c) 2016 Takashi Sakamoto
+ * Licensed under the terms of the GNU General Public License, version 2.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM		snd_firewire_lib
+
+#if !defined(_AMDTP_STREAM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _AMDTP_STREAM_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(in_packet,
+	TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_quadlets, unsigned int index),
+	TP_ARGS(s, cycles, cip_header, payload_quadlets, index),
+	TP_STRUCT__entry(
+		__field(unsigned int, second)
+		__field(unsigned int, cycle)
+		__field(int, channel)
+		__field(int, src)
+		__field(int, dest)
+		__field(u32, cip_header0)
+		__field(u32, cip_header1)
+		__field(unsigned int, payload_quadlets)
+		__field(unsigned int, packet_index)
+		__field(unsigned int, irq)
+		__field(unsigned int, index)
+	),
+	TP_fast_assign(
+		__entry->second = cycles / CYCLES_PER_SECOND;
+		__entry->cycle = cycles % CYCLES_PER_SECOND;
+		__entry->channel = s->context->channel;
+		__entry->src = fw_parent_device(s->unit)->node_id;
+		__entry->dest = fw_parent_device(s->unit)->card->node_id;
+		__entry->cip_header0 = cip_header[0];
+		__entry->cip_header1 = cip_header[1];
+		__entry->payload_quadlets = payload_quadlets;
+		__entry->packet_index = s->packet_index;
+		__entry->irq = !!in_interrupt();
+		__entry->index = index;
+	),
+	TP_printk(
+		"%02u %04u %04x %04x %02d %08x %08x %03u %02u %01u %02u",
+		__entry->second,
+		__entry->cycle,
+		__entry->src,
+		__entry->dest,
+		__entry->channel,
+		__entry->cip_header0,
+		__entry->cip_header1,
+		__entry->payload_quadlets,
+		__entry->packet_index,
+		__entry->irq,
+		__entry->index)
+);
+
+TRACE_EVENT(out_packet,
+	TP_PROTO(const struct amdtp_stream *s, u32 cycles, __be32 *cip_header, unsigned int payload_length, unsigned int index),
+	TP_ARGS(s, cycles, cip_header, payload_length, index),
+	TP_STRUCT__entry(
+		__field(unsigned int, second)
+		__field(unsigned int, cycle)
+		__field(int, channel)
+		__field(int, src)
+		__field(int, dest)
+		__field(u32, cip_header0)
+		__field(u32, cip_header1)
+		__field(unsigned int, payload_quadlets)
+		__field(unsigned int, packet_index)
+		__field(unsigned int, irq)
+		__field(unsigned int, index)
+	),
+	TP_fast_assign(
+		__entry->second = cycles / CYCLES_PER_SECOND;
+		__entry->cycle = cycles % CYCLES_PER_SECOND;
+		__entry->channel = s->context->channel;
+		__entry->src = fw_parent_device(s->unit)->card->node_id;
+		__entry->dest = fw_parent_device(s->unit)->node_id;
+		__entry->cip_header0 = be32_to_cpu(cip_header[0]);
+		__entry->cip_header1 = be32_to_cpu(cip_header[1]);
+		__entry->payload_quadlets = payload_length / 4;
+		__entry->packet_index = s->packet_index;
+		__entry->irq = !!in_interrupt();
+		__entry->index = index;
+	),
+	TP_printk(
+		"%02u %04u %04x %04x %02d %08x %08x %03u %02u %01u %02u",
+		__entry->second,
+		__entry->cycle,
+		__entry->src,
+		__entry->dest,
+		__entry->channel,
+		__entry->cip_header0,
+		__entry->cip_header1,
+		__entry->payload_quadlets,
+		__entry->packet_index,
+		__entry->irq,
+		__entry->index)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH	.
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE	amdtp-stream-trace
+#include <trace/define_trace.h>
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index ed29026..00060c4 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -19,6 +19,10 @@
 #define CYCLES_PER_SECOND	8000
 #define TICKS_PER_SECOND	(TICKS_PER_CYCLE * CYCLES_PER_SECOND)
 
+/* Always support Linux tracing subsystem. */
+#define CREATE_TRACE_POINTS
+#include "amdtp-stream-trace.h"
+
 #define TRANSFER_DELAY_TICKS	0x2e00 /* 479.17 microseconds */
 
 /* isochronous header parameters */
@@ -87,7 +91,6 @@
 
 	init_waitqueue_head(&s->callback_wait);
 	s->callbacked = false;
-	s->sync_slave = NULL;
 
 	s->fmt = fmt;
 	s->process_data_blocks = process_data_blocks;
@@ -102,6 +105,10 @@
  */
 void amdtp_stream_destroy(struct amdtp_stream *s)
 {
+	/* Not initialized. */
+	if (s->protocol == NULL)
+		return;
+
 	WARN_ON(amdtp_stream_running(s));
 	kfree(s->protocol);
 	mutex_destroy(&s->mutex);
@@ -244,7 +251,6 @@
 	tasklet_kill(&s->period_tasklet);
 	s->pcm_buffer_pointer = 0;
 	s->pcm_period_pointer = 0;
-	s->pointer_flush = true;
 }
 EXPORT_SYMBOL(amdtp_stream_pcm_prepare);
 
@@ -349,7 +355,6 @@
 	s->pcm_period_pointer += frames;
 	if (s->pcm_period_pointer >= pcm->runtime->period_size) {
 		s->pcm_period_pointer -= pcm->runtime->period_size;
-		s->pointer_flush = false;
 		tasklet_hi_schedule(&s->period_tasklet);
 	}
 }
@@ -363,9 +368,8 @@
 		snd_pcm_period_elapsed(pcm);
 }
 
-static int queue_packet(struct amdtp_stream *s,
-			unsigned int header_length,
-			unsigned int payload_length, bool skip)
+static int queue_packet(struct amdtp_stream *s, unsigned int header_length,
+			unsigned int payload_length)
 {
 	struct fw_iso_packet p = {0};
 	int err = 0;
@@ -376,8 +380,10 @@
 	p.interrupt = IS_ALIGNED(s->packet_index + 1, INTERRUPT_INTERVAL);
 	p.tag = TAG_CIP;
 	p.header_length = header_length;
-	p.payload_length = (!skip) ? payload_length : 0;
-	p.skip = skip;
+	if (payload_length > 0)
+		p.payload_length = payload_length;
+	else
+		p.skip = true;
 	err = fw_iso_context_queue(s->context, &p, &s->buffer.iso_buffer,
 				   s->buffer.packets[s->packet_index].offset);
 	if (err < 0) {
@@ -392,27 +398,30 @@
 }
 
 static inline int queue_out_packet(struct amdtp_stream *s,
-				   unsigned int payload_length, bool skip)
+				   unsigned int payload_length)
 {
-	return queue_packet(s, OUT_PACKET_HEADER_SIZE,
-			    payload_length, skip);
+	return queue_packet(s, OUT_PACKET_HEADER_SIZE, payload_length);
 }
 
 static inline int queue_in_packet(struct amdtp_stream *s)
 {
 	return queue_packet(s, IN_PACKET_HEADER_SIZE,
-			    amdtp_stream_get_max_payload(s), false);
+			    amdtp_stream_get_max_payload(s));
 }
 
-static int handle_out_packet(struct amdtp_stream *s, unsigned int data_blocks,
-			     unsigned int syt)
+static int handle_out_packet(struct amdtp_stream *s, unsigned int cycle,
+			     unsigned int index)
 {
 	__be32 *buffer;
+	unsigned int syt;
+	unsigned int data_blocks;
 	unsigned int payload_length;
 	unsigned int pcm_frames;
 	struct snd_pcm_substream *pcm;
 
 	buffer = s->buffer.packets[s->packet_index].buffer;
+	syt = calculate_syt(s, cycle);
+	data_blocks = calculate_data_blocks(s, syt);
 	pcm_frames = s->process_data_blocks(s, buffer + 2, data_blocks, &syt);
 
 	buffer[0] = cpu_to_be32(ACCESS_ONCE(s->source_node_id_field) |
@@ -424,9 +433,11 @@
 				(syt & CIP_SYT_MASK));
 
 	s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff;
-
 	payload_length = 8 + data_blocks * 4 * s->data_block_quadlets;
-	if (queue_out_packet(s, payload_length, false) < 0)
+
+	trace_out_packet(s, cycle, buffer, payload_length, index);
+
+	if (queue_out_packet(s, payload_length) < 0)
 		return -EIO;
 
 	pcm = ACCESS_ONCE(s->pcm);
@@ -438,19 +449,24 @@
 }
 
 static int handle_in_packet(struct amdtp_stream *s,
-			    unsigned int payload_quadlets, __be32 *buffer,
-			    unsigned int *data_blocks, unsigned int syt)
+			    unsigned int payload_quadlets, unsigned int cycle,
+			    unsigned int index)
 {
+	__be32 *buffer;
 	u32 cip_header[2];
-	unsigned int fmt, fdf;
+	unsigned int fmt, fdf, syt;
 	unsigned int data_block_quadlets, data_block_counter, dbc_interval;
+	unsigned int data_blocks;
 	struct snd_pcm_substream *pcm;
 	unsigned int pcm_frames;
 	bool lost;
 
+	buffer = s->buffer.packets[s->packet_index].buffer;
 	cip_header[0] = be32_to_cpu(buffer[0]);
 	cip_header[1] = be32_to_cpu(buffer[1]);
 
+	trace_in_packet(s, cycle, cip_header, payload_quadlets, index);
+
 	/*
 	 * This module supports 'Two-quadlet CIP header with SYT field'.
 	 * For convenience, also check FMT field is AM824 or not.
@@ -460,7 +476,7 @@
 		dev_info_ratelimited(&s->unit->device,
 				"Invalid CIP header for AMDTP: %08X:%08X\n",
 				cip_header[0], cip_header[1]);
-		*data_blocks = 0;
+		data_blocks = 0;
 		pcm_frames = 0;
 		goto end;
 	}
@@ -471,7 +487,7 @@
 		dev_info_ratelimited(&s->unit->device,
 				     "Detect unexpected protocol: %08x %08x\n",
 				     cip_header[0], cip_header[1]);
-		*data_blocks = 0;
+		data_blocks = 0;
 		pcm_frames = 0;
 		goto end;
 	}
@@ -480,7 +496,7 @@
 	fdf = (cip_header[1] & CIP_FDF_MASK) >> CIP_FDF_SHIFT;
 	if (payload_quadlets < 3 ||
 	    (fmt == CIP_FMT_AM && fdf == AMDTP_FDF_NO_DATA)) {
-		*data_blocks = 0;
+		data_blocks = 0;
 	} else {
 		data_block_quadlets =
 			(cip_header[0] & CIP_DBS_MASK) >> CIP_DBS_SHIFT;
@@ -494,12 +510,12 @@
 		if (s->flags & CIP_WRONG_DBS)
 			data_block_quadlets = s->data_block_quadlets;
 
-		*data_blocks = (payload_quadlets - 2) / data_block_quadlets;
+		data_blocks = (payload_quadlets - 2) / data_block_quadlets;
 	}
 
 	/* Check data block counter continuity */
 	data_block_counter = cip_header[0] & CIP_DBC_MASK;
-	if (*data_blocks == 0 && (s->flags & CIP_EMPTY_HAS_WRONG_DBC) &&
+	if (data_blocks == 0 && (s->flags & CIP_EMPTY_HAS_WRONG_DBC) &&
 	    s->data_block_counter != UINT_MAX)
 		data_block_counter = s->data_block_counter;
 
@@ -510,10 +526,10 @@
 	} else if (!(s->flags & CIP_DBC_IS_END_EVENT)) {
 		lost = data_block_counter != s->data_block_counter;
 	} else {
-		if ((*data_blocks > 0) && (s->tx_dbc_interval > 0))
+		if (data_blocks > 0 && s->tx_dbc_interval > 0)
 			dbc_interval = s->tx_dbc_interval;
 		else
-			dbc_interval = *data_blocks;
+			dbc_interval = data_blocks;
 
 		lost = data_block_counter !=
 		       ((s->data_block_counter + dbc_interval) & 0xff);
@@ -526,13 +542,14 @@
 		return -EIO;
 	}
 
-	pcm_frames = s->process_data_blocks(s, buffer + 2, *data_blocks, &syt);
+	syt = be32_to_cpu(buffer[1]) & CIP_SYT_MASK;
+	pcm_frames = s->process_data_blocks(s, buffer + 2, data_blocks, &syt);
 
 	if (s->flags & CIP_DBC_IS_END_EVENT)
 		s->data_block_counter = data_block_counter;
 	else
 		s->data_block_counter =
-				(data_block_counter + *data_blocks) & 0xff;
+				(data_block_counter + data_blocks) & 0xff;
 end:
 	if (queue_in_packet(s) < 0)
 		return -EIO;
@@ -544,29 +561,50 @@
 	return 0;
 }
 
-static void out_stream_callback(struct fw_iso_context *context, u32 cycle,
+/*
+ * In CYCLE_TIMER register of IEEE 1394, 7 bits are used to represent second. On
+ * the other hand, in DMA descriptors of 1394 OHCI, 3 bits are used to represent
+ * it. Thus, via Linux firewire subsystem, we can get the 3 bits for second.
+ */
+static inline u32 compute_cycle_count(u32 tstamp)
+{
+	return (((tstamp >> 13) & 0x07) * 8000) + (tstamp & 0x1fff);
+}
+
+static inline u32 increment_cycle_count(u32 cycle, unsigned int addend)
+{
+	cycle += addend;
+	if (cycle >= 8 * CYCLES_PER_SECOND)
+		cycle -= 8 * CYCLES_PER_SECOND;
+	return cycle;
+}
+
+static inline u32 decrement_cycle_count(u32 cycle, unsigned int subtrahend)
+{
+	if (cycle < subtrahend)
+		cycle += 8 * CYCLES_PER_SECOND;
+	return cycle - subtrahend;
+}
+
+static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
 				size_t header_length, void *header,
 				void *private_data)
 {
 	struct amdtp_stream *s = private_data;
-	unsigned int i, syt, packets = header_length / 4;
-	unsigned int data_blocks;
+	unsigned int i, packets = header_length / 4;
+	u32 cycle;
 
 	if (s->packet_index < 0)
 		return;
 
-	/*
-	 * Compute the cycle of the last queued packet.
-	 * (We need only the four lowest bits for the SYT, so we can ignore
-	 * that bits 0-11 must wrap around at 3072.)
-	 */
-	cycle += QUEUE_LENGTH - packets;
+	cycle = compute_cycle_count(tstamp);
+
+	/* Align to actual cycle count for the last packet. */
+	cycle = increment_cycle_count(cycle, QUEUE_LENGTH - packets);
 
 	for (i = 0; i < packets; ++i) {
-		syt = calculate_syt(s, ++cycle);
-		data_blocks = calculate_data_blocks(s, syt);
-
-		if (handle_out_packet(s, data_blocks, syt) < 0) {
+		cycle = increment_cycle_count(cycle, 1);
+		if (handle_out_packet(s, cycle, i) < 0) {
 			s->packet_index = -1;
 			amdtp_stream_pcm_abort(s);
 			return;
@@ -576,15 +614,15 @@
 	fw_iso_context_queue_flush(s->context);
 }
 
-static void in_stream_callback(struct fw_iso_context *context, u32 cycle,
+static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
 			       size_t header_length, void *header,
 			       void *private_data)
 {
 	struct amdtp_stream *s = private_data;
-	unsigned int p, syt, packets;
+	unsigned int i, packets;
 	unsigned int payload_quadlets, max_payload_quadlets;
-	unsigned int data_blocks;
-	__be32 *buffer, *headers = header;
+	__be32 *headers = header;
+	u32 cycle;
 
 	if (s->packet_index < 0)
 		return;
@@ -592,70 +630,44 @@
 	/* The number of packets in buffer */
 	packets = header_length / IN_PACKET_HEADER_SIZE;
 
+	cycle = compute_cycle_count(tstamp);
+
+	/* Align to actual cycle count for the last packet. */
+	cycle = decrement_cycle_count(cycle, packets);
+
 	/* For buffer-over-run prevention. */
 	max_payload_quadlets = amdtp_stream_get_max_payload(s) / 4;
 
-	for (p = 0; p < packets; p++) {
-		buffer = s->buffer.packets[s->packet_index].buffer;
+	for (i = 0; i < packets; i++) {
+		cycle = increment_cycle_count(cycle, 1);
 
 		/* The number of quadlets in this packet */
 		payload_quadlets =
-			(be32_to_cpu(headers[p]) >> ISO_DATA_LENGTH_SHIFT) / 4;
+			(be32_to_cpu(headers[i]) >> ISO_DATA_LENGTH_SHIFT) / 4;
 		if (payload_quadlets > max_payload_quadlets) {
 			dev_err(&s->unit->device,
 				"Detect jumbo payload: %02x %02x\n",
 				payload_quadlets, max_payload_quadlets);
-			s->packet_index = -1;
 			break;
 		}
 
-		syt = be32_to_cpu(buffer[1]) & CIP_SYT_MASK;
-		if (handle_in_packet(s, payload_quadlets, buffer,
-						&data_blocks, syt) < 0) {
-			s->packet_index = -1;
+		if (handle_in_packet(s, payload_quadlets, cycle, i) < 0)
 			break;
-		}
-
-		/* Process sync slave stream */
-		if (s->sync_slave && s->sync_slave->callbacked) {
-			if (handle_out_packet(s->sync_slave,
-					      data_blocks, syt) < 0) {
-				s->packet_index = -1;
-				break;
-			}
-		}
 	}
 
-	/* Queueing error or detecting discontinuity */
-	if (s->packet_index < 0) {
+	/* Queueing error or detecting invalid payload. */
+	if (i < packets) {
+		s->packet_index = -1;
 		amdtp_stream_pcm_abort(s);
-
-		/* Abort sync slave. */
-		if (s->sync_slave) {
-			s->sync_slave->packet_index = -1;
-			amdtp_stream_pcm_abort(s->sync_slave);
-		}
 		return;
 	}
 
-	/* when sync to device, flush the packets for slave stream */
-	if (s->sync_slave && s->sync_slave->callbacked)
-		fw_iso_context_queue_flush(s->sync_slave->context);
-
 	fw_iso_context_queue_flush(s->context);
 }
 
-/* processing is done by master callback */
-static void slave_stream_callback(struct fw_iso_context *context, u32 cycle,
-				  size_t header_length, void *header,
-				  void *private_data)
-{
-	return;
-}
-
 /* this is executed one time */
 static void amdtp_stream_first_callback(struct fw_iso_context *context,
-					u32 cycle, size_t header_length,
+					u32 tstamp, size_t header_length,
 					void *header, void *private_data)
 {
 	struct amdtp_stream *s = private_data;
@@ -669,12 +681,10 @@
 
 	if (s->direction == AMDTP_IN_STREAM)
 		context->callback.sc = in_stream_callback;
-	else if (s->flags & CIP_SYNC_TO_DEVICE)
-		context->callback.sc = slave_stream_callback;
 	else
 		context->callback.sc = out_stream_callback;
 
-	context->callback.sc(context, cycle, header_length, header, s);
+	context->callback.sc(context, tstamp, header_length, header, s);
 }
 
 /**
@@ -713,8 +723,7 @@
 		goto err_unlock;
 	}
 
-	if (s->direction == AMDTP_IN_STREAM &&
-	    s->flags & CIP_SKIP_INIT_DBC_CHECK)
+	if (s->direction == AMDTP_IN_STREAM)
 		s->data_block_counter = UINT_MAX;
 	else
 		s->data_block_counter = 0;
@@ -755,7 +764,7 @@
 		if (s->direction == AMDTP_IN_STREAM)
 			err = queue_in_packet(s);
 		else
-			err = queue_out_packet(s, 0, true);
+			err = queue_out_packet(s, 0);
 		if (err < 0)
 			goto err_context;
 	} while (s->packet_index > 0);
@@ -794,11 +803,24 @@
  */
 unsigned long amdtp_stream_pcm_pointer(struct amdtp_stream *s)
 {
-	/* this optimization is allowed to be racy */
-	if (s->pointer_flush && amdtp_stream_running(s))
+	/*
+	 * This function is called in software IRQ context of period_tasklet or
+	 * process context.
+	 *
+	 * When the software IRQ context was scheduled by software IRQ context
+	 * of IR/IT contexts, queued packets were already handled. Therefore,
+	 * no need to flush the queue in buffer anymore.
+	 *
+	 * When the process context reach here, some packets will be already
+	 * queued in the buffer. These packets should be handled immediately
+	 * to keep better granularity of PCM pointer.
+	 *
+	 * Later, the process context will sometimes schedules software IRQ
+	 * context of the period_tasklet. Then, no need to flush the queue by
+	 * the same reason as described for IR/IT contexts.
+	 */
+	if (!in_interrupt() && amdtp_stream_running(s))
 		fw_iso_context_flush_completions(s->context);
-	else
-		s->pointer_flush = true;
 
 	return ACCESS_ONCE(s->pcm_buffer_pointer);
 }
diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h
index 8775704..c1bc7fa 100644
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h
@@ -17,8 +17,6 @@
  * @CIP_BLOCKING: In blocking mode, each packet contains either zero or
  *	SYT_INTERVAL samples, with these two types alternating so that
  *	the overall sample rate comes out right.
- * @CIP_SYNC_TO_DEVICE: In sync to device mode, time stamp in out packets is
- *	generated by in packets. Defaultly this driver generates timestamp.
  * @CIP_EMPTY_WITH_TAG0: Only for in-stream. Empty in-packets have TAG0.
  * @CIP_DBC_IS_END_EVENT: Only for in-stream. The value of dbc in an in-packet
  *	corresponds to the end of event in the packet. Out of IEC 61883.
@@ -26,8 +24,6 @@
  *	The value of data_block_quadlets is used instead of reported value.
  * @CIP_SKIP_DBC_ZERO_CHECK: Only for in-stream.  Packets with zero in dbc is
  *	skipped for detecting discontinuity.
- * @CIP_SKIP_INIT_DBC_CHECK: Only for in-stream. The value of dbc in first
- *	packet is not continuous from an initial value.
  * @CIP_EMPTY_HAS_WRONG_DBC: Only for in-stream. The value of dbc in empty
  *	packet is wrong but the others are correct.
  * @CIP_JUMBO_PAYLOAD: Only for in-stream. The number of data blocks in an
@@ -37,14 +33,12 @@
 enum cip_flags {
 	CIP_NONBLOCKING		= 0x00,
 	CIP_BLOCKING		= 0x01,
-	CIP_SYNC_TO_DEVICE	= 0x02,
-	CIP_EMPTY_WITH_TAG0	= 0x04,
-	CIP_DBC_IS_END_EVENT	= 0x08,
-	CIP_WRONG_DBS		= 0x10,
-	CIP_SKIP_DBC_ZERO_CHECK	= 0x20,
-	CIP_SKIP_INIT_DBC_CHECK	= 0x40,
-	CIP_EMPTY_HAS_WRONG_DBC	= 0x80,
-	CIP_JUMBO_PAYLOAD	= 0x100,
+	CIP_EMPTY_WITH_TAG0	= 0x02,
+	CIP_DBC_IS_END_EVENT	= 0x04,
+	CIP_WRONG_DBS		= 0x08,
+	CIP_SKIP_DBC_ZERO_CHECK	= 0x10,
+	CIP_EMPTY_HAS_WRONG_DBC	= 0x20,
+	CIP_JUMBO_PAYLOAD	= 0x40,
 };
 
 /**
@@ -132,12 +126,10 @@
 	struct tasklet_struct period_tasklet;
 	unsigned int pcm_buffer_pointer;
 	unsigned int pcm_period_pointer;
-	bool pointer_flush;
 
 	/* To wait for first packet. */
 	bool callbacked;
 	wait_queue_head_t callback_wait;
-	struct amdtp_stream *sync_slave;
 
 	/* For backends to process data blocks. */
 	void *protocol;
@@ -223,23 +215,6 @@
 	return sfc & 1;
 }
 
-static inline void amdtp_stream_set_sync(enum cip_flags sync_mode,
-					 struct amdtp_stream *master,
-					 struct amdtp_stream *slave)
-{
-	if (sync_mode == CIP_SYNC_TO_DEVICE) {
-		master->flags |= CIP_SYNC_TO_DEVICE;
-		slave->flags |= CIP_SYNC_TO_DEVICE;
-		master->sync_slave = slave;
-	} else {
-		master->flags &= ~CIP_SYNC_TO_DEVICE;
-		slave->flags &= ~CIP_SYNC_TO_DEVICE;
-		master->sync_slave = NULL;
-	}
-
-	slave->sync_slave = NULL;
-}
-
 /**
  * amdtp_stream_wait_callback - sleep till callbacked or timeout
  * @s: the AMDTP stream
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c
index 3e4e0756..f7e2cbd 100644
--- a/sound/firewire/bebob/bebob.c
+++ b/sound/firewire/bebob/bebob.c
@@ -67,7 +67,7 @@
 #define MODEL_MAUDIO_PROJECTMIX		0x00010091
 
 static int
-name_device(struct snd_bebob *bebob, unsigned int vendor_id)
+name_device(struct snd_bebob *bebob)
 {
 	struct fw_device *fw_dev = fw_parent_device(bebob->unit);
 	char vendor[24] = {0};
@@ -126,6 +126,17 @@
 	return err;
 }
 
+static void bebob_free(struct snd_bebob *bebob)
+{
+	snd_bebob_stream_destroy_duplex(bebob);
+	fw_unit_put(bebob->unit);
+
+	kfree(bebob->maudio_special_quirk);
+
+	mutex_destroy(&bebob->mutex);
+	kfree(bebob);
+}
+
 /*
  * This module releases the FireWire unit data after all ALSA character devices
  * are released by applications. This is for releasing stream data or finishing
@@ -137,18 +148,11 @@
 {
 	struct snd_bebob *bebob = card->private_data;
 
-	snd_bebob_stream_destroy_duplex(bebob);
-	fw_unit_put(bebob->unit);
+	mutex_lock(&devices_mutex);
+	clear_bit(bebob->card_index, devices_used);
+	mutex_unlock(&devices_mutex);
 
-	kfree(bebob->maudio_special_quirk);
-
-	if (bebob->card_index >= 0) {
-		mutex_lock(&devices_mutex);
-		clear_bit(bebob->card_index, devices_used);
-		mutex_unlock(&devices_mutex);
-	}
-
-	mutex_destroy(&bebob->mutex);
+	bebob_free(card->private_data);
 }
 
 static const struct snd_bebob_spec *
@@ -176,16 +180,17 @@
 	return strncmp(name, "FW Audiophile Bootloader", 15) != 0;
 }
 
-static int
-bebob_probe(struct fw_unit *unit,
-	    const struct ieee1394_device_id *entry)
+static void
+do_registration(struct work_struct *work)
 {
-	struct snd_card *card;
-	struct snd_bebob *bebob;
-	const struct snd_bebob_spec *spec;
+	struct snd_bebob *bebob =
+			container_of(work, struct snd_bebob, dwork.work);
 	unsigned int card_index;
 	int err;
 
+	if (bebob->registered)
+		return;
+
 	mutex_lock(&devices_mutex);
 
 	for (card_index = 0; card_index < SNDRV_CARDS; card_index++) {
@@ -193,64 +198,39 @@
 			break;
 	}
 	if (card_index >= SNDRV_CARDS) {
-		err = -ENOENT;
-		goto end;
+		mutex_unlock(&devices_mutex);
+		return;
 	}
 
-	if ((entry->vendor_id == VEN_FOCUSRITE) &&
-	    (entry->model_id == MODEL_FOCUSRITE_SAFFIRE_BOTH))
-		spec = get_saffire_spec(unit);
-	else if ((entry->vendor_id == VEN_MAUDIO1) &&
-		 (entry->model_id == MODEL_MAUDIO_AUDIOPHILE_BOTH) &&
-		 !check_audiophile_booted(unit))
-		spec = NULL;
-	else
-		spec = (const struct snd_bebob_spec *)entry->driver_data;
-
-	if (spec == NULL) {
-		if ((entry->vendor_id == VEN_MAUDIO1) ||
-		    (entry->vendor_id == VEN_MAUDIO2))
-			err = snd_bebob_maudio_load_firmware(unit);
-		else
-			err = -ENOSYS;
-		goto end;
+	err = snd_card_new(&bebob->unit->device, index[card_index],
+			   id[card_index], THIS_MODULE, 0, &bebob->card);
+	if (err < 0) {
+		mutex_unlock(&devices_mutex);
+		return;
 	}
 
-	err = snd_card_new(&unit->device, index[card_index], id[card_index],
-			   THIS_MODULE, sizeof(struct snd_bebob), &card);
-	if (err < 0)
-		goto end;
-	bebob = card->private_data;
-	bebob->card_index = card_index;
-	set_bit(card_index, devices_used);
-	card->private_free = bebob_card_free;
-
-	bebob->card = card;
-	bebob->unit = fw_unit_get(unit);
-	bebob->spec = spec;
-	mutex_init(&bebob->mutex);
-	spin_lock_init(&bebob->lock);
-	init_waitqueue_head(&bebob->hwdep_wait);
-
-	err = name_device(bebob, entry->vendor_id);
+	err = name_device(bebob);
 	if (err < 0)
 		goto error;
 
-	if ((entry->vendor_id == VEN_MAUDIO1) &&
-	    (entry->model_id == MODEL_MAUDIO_FW1814))
-		err = snd_bebob_maudio_special_discover(bebob, true);
-	else if ((entry->vendor_id == VEN_MAUDIO1) &&
-		 (entry->model_id == MODEL_MAUDIO_PROJECTMIX))
-		err = snd_bebob_maudio_special_discover(bebob, false);
-	else
+	if (bebob->spec == &maudio_special_spec) {
+		if (bebob->entry->model_id == MODEL_MAUDIO_FW1814)
+			err = snd_bebob_maudio_special_discover(bebob, true);
+		else
+			err = snd_bebob_maudio_special_discover(bebob, false);
+	} else {
 		err = snd_bebob_stream_discover(bebob);
+	}
+	if (err < 0)
+		goto error;
+
+	err = snd_bebob_stream_init_duplex(bebob);
 	if (err < 0)
 		goto error;
 
 	snd_bebob_proc_init(bebob);
 
-	if ((bebob->midi_input_ports > 0) ||
-	    (bebob->midi_output_ports > 0)) {
+	if (bebob->midi_input_ports > 0 || bebob->midi_output_ports > 0) {
 		err = snd_bebob_create_midi_devices(bebob);
 		if (err < 0)
 			goto error;
@@ -264,16 +244,75 @@
 	if (err < 0)
 		goto error;
 
-	err = snd_bebob_stream_init_duplex(bebob);
+	err = snd_card_register(bebob->card);
 	if (err < 0)
 		goto error;
 
-	if (!bebob->maudio_special_quirk) {
-		err = snd_card_register(card);
-		if (err < 0) {
-			snd_bebob_stream_destroy_duplex(bebob);
-			goto error;
-		}
+	set_bit(card_index, devices_used);
+	mutex_unlock(&devices_mutex);
+
+	/*
+	 * After registered, bebob instance can be released corresponding to
+	 * releasing the sound card instance.
+	 */
+	bebob->card->private_free = bebob_card_free;
+	bebob->card->private_data = bebob;
+	bebob->registered = true;
+
+	return;
+error:
+	mutex_unlock(&devices_mutex);
+	snd_bebob_stream_destroy_duplex(bebob);
+	snd_card_free(bebob->card);
+	dev_info(&bebob->unit->device,
+		 "Sound card registration failed: %d\n", err);
+}
+
+static int
+bebob_probe(struct fw_unit *unit, const struct ieee1394_device_id *entry)
+{
+	struct snd_bebob *bebob;
+	const struct snd_bebob_spec *spec;
+
+	if (entry->vendor_id == VEN_FOCUSRITE &&
+	    entry->model_id == MODEL_FOCUSRITE_SAFFIRE_BOTH)
+		spec = get_saffire_spec(unit);
+	else if (entry->vendor_id == VEN_MAUDIO1 &&
+		 entry->model_id == MODEL_MAUDIO_AUDIOPHILE_BOTH &&
+		 !check_audiophile_booted(unit))
+		spec = NULL;
+	else
+		spec = (const struct snd_bebob_spec *)entry->driver_data;
+
+	if (spec == NULL) {
+		if (entry->vendor_id == VEN_MAUDIO1 ||
+		    entry->vendor_id == VEN_MAUDIO2)
+			return snd_bebob_maudio_load_firmware(unit);
+		else
+			return -ENODEV;
+	}
+
+	/* Allocate this independent of sound card instance. */
+	bebob = kzalloc(sizeof(struct snd_bebob), GFP_KERNEL);
+	if (bebob == NULL)
+		return -ENOMEM;
+
+	bebob->unit = fw_unit_get(unit);
+	bebob->entry = entry;
+	bebob->spec = spec;
+	dev_set_drvdata(&unit->device, bebob);
+
+	mutex_init(&bebob->mutex);
+	spin_lock_init(&bebob->lock);
+	init_waitqueue_head(&bebob->hwdep_wait);
+
+	/* Allocate and register this sound card later. */
+	INIT_DEFERRABLE_WORK(&bebob->dwork, do_registration);
+
+	if (entry->vendor_id != VEN_MAUDIO1 ||
+	    (entry->model_id != MODEL_MAUDIO_FW1814 &&
+	     entry->model_id != MODEL_MAUDIO_PROJECTMIX)) {
+		snd_fw_schedule_registration(unit, &bebob->dwork);
 	} else {
 		/*
 		 * This is a workaround. This bus reset seems to have an effect
@@ -285,19 +324,11 @@
 		 * signals from dbus and starts I/Os. To avoid I/Os till the
 		 * future bus reset, registration is done in next update().
 		 */
-		bebob->deferred_registration = true;
 		fw_schedule_bus_reset(fw_parent_device(bebob->unit)->card,
 				      false, true);
 	}
 
-	dev_set_drvdata(&unit->device, bebob);
-end:
-	mutex_unlock(&devices_mutex);
-	return err;
-error:
-	mutex_unlock(&devices_mutex);
-	snd_card_free(card);
-	return err;
+	return 0;
 }
 
 /*
@@ -324,15 +355,11 @@
 	if (bebob == NULL)
 		return;
 
-	fcp_bus_reset(bebob->unit);
-
-	if (bebob->deferred_registration) {
-		if (snd_card_register(bebob->card) < 0) {
-			snd_bebob_stream_destroy_duplex(bebob);
-			snd_card_free(bebob->card);
-		}
-		bebob->deferred_registration = false;
-	}
+	/* Postpone a workqueue for deferred registration. */
+	if (!bebob->registered)
+		snd_fw_schedule_registration(unit, &bebob->dwork);
+	else
+		fcp_bus_reset(bebob->unit);
 }
 
 static void bebob_remove(struct fw_unit *unit)
@@ -342,8 +369,20 @@
 	if (bebob == NULL)
 		return;
 
-	/* No need to wait for releasing card object in this context. */
-	snd_card_free_when_closed(bebob->card);
+	/*
+	 * Confirm to stop the work for registration before the sound card is
+	 * going to be released. The work is not scheduled again because bus
+	 * reset handler is not called anymore.
+	 */
+	cancel_delayed_work_sync(&bebob->dwork);
+
+	if (bebob->registered) {
+		/* No need to wait for releasing card object in this context. */
+		snd_card_free_when_closed(bebob->card);
+	} else {
+		/* Don't forget this case. */
+		bebob_free(bebob);
+	}
 }
 
 static const struct snd_bebob_rate_spec normal_rate_spec = {
diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index b50bb33d..e7f1bb9 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h
@@ -83,6 +83,10 @@
 	struct mutex mutex;
 	spinlock_t lock;
 
+	bool registered;
+	struct delayed_work dwork;
+
+	const struct ieee1394_device_id *entry;
 	const struct snd_bebob_spec *spec;
 
 	unsigned int midi_input_ports;
@@ -90,7 +94,6 @@
 
 	bool connected;
 
-	struct amdtp_stream *master;
 	struct amdtp_stream tx_stream;
 	struct amdtp_stream rx_stream;
 	struct cmp_connection out_conn;
@@ -111,7 +114,6 @@
 
 	/* for M-Audio special devices */
 	void *maudio_special_quirk;
-	bool deferred_registration;
 
 	/* For BeBoB version quirk. */
 	unsigned int version;
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 77cbb02..4d3034a 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -484,30 +484,6 @@
 }
 
 static int
-get_sync_mode(struct snd_bebob *bebob, enum cip_flags *sync_mode)
-{
-	enum snd_bebob_clock_type src;
-	int err;
-
-	err = snd_bebob_stream_get_clock_src(bebob, &src);
-	if (err < 0)
-		return err;
-
-	switch (src) {
-	case SND_BEBOB_CLOCK_TYPE_INTERNAL:
-	case SND_BEBOB_CLOCK_TYPE_EXTERNAL:
-		*sync_mode = CIP_SYNC_TO_DEVICE;
-		break;
-	default:
-	case SND_BEBOB_CLOCK_TYPE_SYT:
-		*sync_mode = 0;
-		break;
-	}
-
-	return 0;
-}
-
-static int
 start_stream(struct snd_bebob *bebob, struct amdtp_stream *stream,
 	     unsigned int rate)
 {
@@ -550,8 +526,6 @@
 		goto end;
 	}
 
-	bebob->tx_stream.flags |= CIP_SKIP_INIT_DBC_CHECK;
-
 	/*
 	 * BeBoB v3 transfers packets with these qurks:
 	 *  - In the beginning of streaming, the value of dbc is incremented
@@ -584,8 +558,6 @@
 int snd_bebob_stream_start_duplex(struct snd_bebob *bebob, unsigned int rate)
 {
 	const struct snd_bebob_rate_spec *rate_spec = bebob->spec->rate;
-	struct amdtp_stream *master, *slave;
-	enum cip_flags sync_mode;
 	unsigned int curr_rate;
 	int err = 0;
 
@@ -593,22 +565,11 @@
 	if (bebob->substreams_counter == 0)
 		goto end;
 
-	err = get_sync_mode(bebob, &sync_mode);
-	if (err < 0)
-		goto end;
-	if (sync_mode == CIP_SYNC_TO_DEVICE) {
-		master = &bebob->tx_stream;
-		slave  = &bebob->rx_stream;
-	} else {
-		master = &bebob->rx_stream;
-		slave  = &bebob->tx_stream;
-	}
-
 	/*
 	 * Considering JACK/FFADO streaming:
 	 * TODO: This can be removed hwdep functionality becomes popular.
 	 */
-	err = check_connection_used_by_others(bebob, master);
+	err = check_connection_used_by_others(bebob, &bebob->rx_stream);
 	if (err < 0)
 		goto end;
 
@@ -618,11 +579,12 @@
 	 * At bus reset, connections should not be broken here. So streams need
 	 * to be re-started. This is a reason to use SKIP_INIT_DBC_CHECK flag.
 	 */
-	if (amdtp_streaming_error(master))
-		amdtp_stream_stop(master);
-	if (amdtp_streaming_error(slave))
-		amdtp_stream_stop(slave);
-	if (!amdtp_stream_running(master) && !amdtp_stream_running(slave))
+	if (amdtp_streaming_error(&bebob->rx_stream))
+		amdtp_stream_stop(&bebob->rx_stream);
+	if (amdtp_streaming_error(&bebob->tx_stream))
+		amdtp_stream_stop(&bebob->tx_stream);
+	if (!amdtp_stream_running(&bebob->rx_stream) &&
+	    !amdtp_stream_running(&bebob->tx_stream))
 		break_both_connections(bebob);
 
 	/* stop streams if rate is different */
@@ -635,16 +597,13 @@
 	if (rate == 0)
 		rate = curr_rate;
 	if (rate != curr_rate) {
-		amdtp_stream_stop(master);
-		amdtp_stream_stop(slave);
+		amdtp_stream_stop(&bebob->rx_stream);
+		amdtp_stream_stop(&bebob->tx_stream);
 		break_both_connections(bebob);
 	}
 
 	/* master should be always running */
-	if (!amdtp_stream_running(master)) {
-		amdtp_stream_set_sync(sync_mode, master, slave);
-		bebob->master = master;
-
+	if (!amdtp_stream_running(&bebob->rx_stream)) {
 		/*
 		 * NOTE:
 		 * If establishing connections at first, Yamaha GO46
@@ -666,7 +625,7 @@
 		if (err < 0)
 			goto end;
 
-		err = start_stream(bebob, master, rate);
+		err = start_stream(bebob, &bebob->rx_stream, rate);
 		if (err < 0) {
 			dev_err(&bebob->unit->device,
 				"fail to run AMDTP master stream:%d\n", err);
@@ -685,15 +644,16 @@
 				dev_err(&bebob->unit->device,
 					"fail to ensure sampling rate: %d\n",
 					err);
-				amdtp_stream_stop(master);
+				amdtp_stream_stop(&bebob->rx_stream);
 				break_both_connections(bebob);
 				goto end;
 			}
 		}
 
 		/* wait first callback */
-		if (!amdtp_stream_wait_callback(master, CALLBACK_TIMEOUT)) {
-			amdtp_stream_stop(master);
+		if (!amdtp_stream_wait_callback(&bebob->rx_stream,
+						CALLBACK_TIMEOUT)) {
+			amdtp_stream_stop(&bebob->rx_stream);
 			break_both_connections(bebob);
 			err = -ETIMEDOUT;
 			goto end;
@@ -701,20 +661,21 @@
 	}
 
 	/* start slave if needed */
-	if (!amdtp_stream_running(slave)) {
-		err = start_stream(bebob, slave, rate);
+	if (!amdtp_stream_running(&bebob->tx_stream)) {
+		err = start_stream(bebob, &bebob->tx_stream, rate);
 		if (err < 0) {
 			dev_err(&bebob->unit->device,
 				"fail to run AMDTP slave stream:%d\n", err);
-			amdtp_stream_stop(master);
+			amdtp_stream_stop(&bebob->rx_stream);
 			break_both_connections(bebob);
 			goto end;
 		}
 
 		/* wait first callback */
-		if (!amdtp_stream_wait_callback(slave, CALLBACK_TIMEOUT)) {
-			amdtp_stream_stop(slave);
-			amdtp_stream_stop(master);
+		if (!amdtp_stream_wait_callback(&bebob->tx_stream,
+						CALLBACK_TIMEOUT)) {
+			amdtp_stream_stop(&bebob->tx_stream);
+			amdtp_stream_stop(&bebob->rx_stream);
 			break_both_connections(bebob);
 			err = -ETIMEDOUT;
 		}
@@ -725,22 +686,12 @@
 
 void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob)
 {
-	struct amdtp_stream *master, *slave;
-
-	if (bebob->master == &bebob->rx_stream) {
-		slave  = &bebob->tx_stream;
-		master = &bebob->rx_stream;
-	} else {
-		slave  = &bebob->rx_stream;
-		master = &bebob->tx_stream;
-	}
-
 	if (bebob->substreams_counter == 0) {
-		amdtp_stream_pcm_abort(master);
-		amdtp_stream_stop(master);
+		amdtp_stream_pcm_abort(&bebob->rx_stream);
+		amdtp_stream_stop(&bebob->rx_stream);
 
-		amdtp_stream_pcm_abort(slave);
-		amdtp_stream_stop(slave);
+		amdtp_stream_pcm_abort(&bebob->tx_stream);
+		amdtp_stream_stop(&bebob->tx_stream);
 
 		break_both_connections(bebob);
 	}
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 8b64aef..25e9f77 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -20,8 +20,6 @@
 #define WEISS_CATEGORY_ID	0x00
 #define LOUD_CATEGORY_ID	0x10
 
-#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
-
 /*
  * Some models support several isochronous channels, while these streams are not
  * always available. In this case, add the model name to this list.
@@ -201,6 +199,10 @@
 
 	dice_card_strings(dice);
 
+	err = snd_dice_stream_init_duplex(dice);
+	if (err < 0)
+		goto error;
+
 	snd_dice_create_proc(dice);
 
 	err = snd_dice_create_pcm(dice);
@@ -229,28 +231,14 @@
 
 	return;
 error:
+	snd_dice_stream_destroy_duplex(dice);
 	snd_dice_transaction_destroy(dice);
+	snd_dice_stream_destroy_duplex(dice);
 	snd_card_free(dice->card);
 	dev_info(&dice->unit->device,
 		 "Sound card registration failed: %d\n", err);
 }
 
-static void schedule_registration(struct snd_dice *dice)
-{
-	struct fw_card *fw_card = fw_parent_device(dice->unit)->card;
-	u64 now, delay;
-
-	now = get_jiffies_64();
-	delay = fw_card->reset_jiffies + msecs_to_jiffies(PROBE_DELAY_MS);
-
-	if (time_after64(delay, now))
-		delay -= now;
-	else
-		delay = 0;
-
-	mod_delayed_work(system_wq, &dice->dwork, delay);
-}
-
 static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
 {
 	struct snd_dice *dice;
@@ -273,15 +261,9 @@
 	init_completion(&dice->clock_accepted);
 	init_waitqueue_head(&dice->hwdep_wait);
 
-	err = snd_dice_stream_init_duplex(dice);
-	if (err < 0) {
-		dice_free(dice);
-		return err;
-	}
-
 	/* Allocate and register this sound card later. */
 	INIT_DEFERRABLE_WORK(&dice->dwork, do_registration);
-	schedule_registration(dice);
+	snd_fw_schedule_registration(unit, &dice->dwork);
 
 	return 0;
 }
@@ -312,7 +294,7 @@
 
 	/* Postpone a workqueue for deferred registration. */
 	if (!dice->registered)
-		schedule_registration(dice);
+		snd_fw_schedule_registration(unit, &dice->dwork);
 
 	/* The handler address register becomes initialized. */
 	snd_dice_transaction_reinit(dice);
@@ -335,6 +317,13 @@
 		.match_flags = IEEE1394_MATCH_VERSION,
 		.version     = DICE_INTERFACE,
 	},
+	/* M-Audio Profire 610/2626 has a different value in version field. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_SPECIFIER_ID,
+		.vendor_id	= 0x000d6c,
+		.specifier_id	= 0x000d6c,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(ieee1394, dice_id_table);
diff --git a/sound/firewire/digi00x/amdtp-dot.c b/sound/firewire/digi00x/amdtp-dot.c
index 0ac92ab..b3cffd0 100644
--- a/sound/firewire/digi00x/amdtp-dot.c
+++ b/sound/firewire/digi00x/amdtp-dot.c
@@ -421,7 +421,7 @@
 
 	/* Use different mode between incoming/outgoing. */
 	if (dir == AMDTP_IN_STREAM) {
-		flags = CIP_NONBLOCKING | CIP_SKIP_INIT_DBC_CHECK;
+		flags = CIP_NONBLOCKING;
 		process_data_blocks = process_tx_data_blocks;
 	} else {
 		flags = CIP_BLOCKING;
diff --git a/sound/firewire/digi00x/digi00x-transaction.c b/sound/firewire/digi00x/digi00x-transaction.c
index 554324d..735d356 100644
--- a/sound/firewire/digi00x/digi00x-transaction.c
+++ b/sound/firewire/digi00x/digi00x-transaction.c
@@ -126,12 +126,17 @@
 	return err;
 error:
 	fw_core_remove_address_handler(&dg00x->async_handler);
-	dg00x->async_handler.address_callback = NULL;
+	dg00x->async_handler.callback_data = NULL;
 	return err;
 }
 
 void snd_dg00x_transaction_unregister(struct snd_dg00x *dg00x)
 {
+	if (dg00x->async_handler.callback_data == NULL)
+		return;
+
 	snd_fw_async_midi_port_destroy(&dg00x->out_control);
 	fw_core_remove_address_handler(&dg00x->async_handler);
+
+	dg00x->async_handler.callback_data = NULL;
 }
diff --git a/sound/firewire/digi00x/digi00x.c b/sound/firewire/digi00x/digi00x.c
index 1f33b7a..cc4776c 100644
--- a/sound/firewire/digi00x/digi00x.c
+++ b/sound/firewire/digi00x/digi00x.c
@@ -40,10 +40,8 @@
 	return 0;
 }
 
-static void dg00x_card_free(struct snd_card *card)
+static void dg00x_free(struct snd_dg00x *dg00x)
 {
-	struct snd_dg00x *dg00x = card->private_data;
-
 	snd_dg00x_stream_destroy_duplex(dg00x);
 	snd_dg00x_transaction_unregister(dg00x);
 
@@ -52,28 +50,24 @@
 	mutex_destroy(&dg00x->mutex);
 }
 
-static int snd_dg00x_probe(struct fw_unit *unit,
-			   const struct ieee1394_device_id *entry)
+static void dg00x_card_free(struct snd_card *card)
 {
-	struct snd_card *card;
-	struct snd_dg00x *dg00x;
+	dg00x_free(card->private_data);
+}
+
+static void do_registration(struct work_struct *work)
+{
+	struct snd_dg00x *dg00x =
+			container_of(work, struct snd_dg00x, dwork.work);
 	int err;
 
-	/* create card */
-	err = snd_card_new(&unit->device, -1, NULL, THIS_MODULE,
-			   sizeof(struct snd_dg00x), &card);
+	if (dg00x->registered)
+		return;
+
+	err = snd_card_new(&dg00x->unit->device, -1, NULL, THIS_MODULE, 0,
+			   &dg00x->card);
 	if (err < 0)
-		return err;
-	card->private_free = dg00x_card_free;
-
-	/* initialize myself */
-	dg00x = card->private_data;
-	dg00x->card = card;
-	dg00x->unit = fw_unit_get(unit);
-
-	mutex_init(&dg00x->mutex);
-	spin_lock_init(&dg00x->lock);
-	init_waitqueue_head(&dg00x->hwdep_wait);
+		return;
 
 	err = name_card(dg00x);
 	if (err < 0)
@@ -101,35 +95,86 @@
 	if (err < 0)
 		goto error;
 
-	err = snd_card_register(card);
+	err = snd_card_register(dg00x->card);
 	if (err < 0)
 		goto error;
 
+	dg00x->card->private_free = dg00x_card_free;
+	dg00x->card->private_data = dg00x;
+	dg00x->registered = true;
+
+	return;
+error:
+	snd_dg00x_transaction_unregister(dg00x);
+	snd_dg00x_stream_destroy_duplex(dg00x);
+	snd_card_free(dg00x->card);
+	dev_info(&dg00x->unit->device,
+		 "Sound card registration failed: %d\n", err);
+}
+
+static int snd_dg00x_probe(struct fw_unit *unit,
+			   const struct ieee1394_device_id *entry)
+{
+	struct snd_dg00x *dg00x;
+
+	/* Allocate this independent of sound card instance. */
+	dg00x = kzalloc(sizeof(struct snd_dg00x), GFP_KERNEL);
+	if (dg00x == NULL)
+		return -ENOMEM;
+
+	dg00x->unit = fw_unit_get(unit);
 	dev_set_drvdata(&unit->device, dg00x);
 
-	return err;
-error:
-	snd_card_free(card);
-	return err;
+	mutex_init(&dg00x->mutex);
+	spin_lock_init(&dg00x->lock);
+	init_waitqueue_head(&dg00x->hwdep_wait);
+
+	/* Allocate and register this sound card later. */
+	INIT_DEFERRABLE_WORK(&dg00x->dwork, do_registration);
+	snd_fw_schedule_registration(unit, &dg00x->dwork);
+
+	return 0;
 }
 
 static void snd_dg00x_update(struct fw_unit *unit)
 {
 	struct snd_dg00x *dg00x = dev_get_drvdata(&unit->device);
 
+	/* Postpone a workqueue for deferred registration. */
+	if (!dg00x->registered)
+		snd_fw_schedule_registration(unit, &dg00x->dwork);
+
 	snd_dg00x_transaction_reregister(dg00x);
 
-	mutex_lock(&dg00x->mutex);
-	snd_dg00x_stream_update_duplex(dg00x);
-	mutex_unlock(&dg00x->mutex);
+	/*
+	 * After registration, userspace can start packet streaming, then this
+	 * code block works fine.
+	 */
+	if (dg00x->registered) {
+		mutex_lock(&dg00x->mutex);
+		snd_dg00x_stream_update_duplex(dg00x);
+		mutex_unlock(&dg00x->mutex);
+	}
 }
 
 static void snd_dg00x_remove(struct fw_unit *unit)
 {
 	struct snd_dg00x *dg00x = dev_get_drvdata(&unit->device);
 
-	/* No need to wait for releasing card object in this context. */
-	snd_card_free_when_closed(dg00x->card);
+	/*
+	 * Confirm to stop the work for registration before the sound card is
+	 * going to be released. The work is not scheduled again because bus
+	 * reset handler is not called anymore.
+	 */
+	cancel_delayed_work_sync(&dg00x->dwork);
+
+	if (dg00x->registered) {
+		/* No need to wait for releasing card object in this context. */
+		snd_card_free_when_closed(dg00x->card);
+	} else {
+		/* Don't forget this case. */
+		dg00x_free(dg00x);
+	}
 }
 
 static const struct ieee1394_device_id snd_dg00x_id_table[] = {
diff --git a/sound/firewire/digi00x/digi00x.h b/sound/firewire/digi00x/digi00x.h
index 907e739..2cd465c 100644
--- a/sound/firewire/digi00x/digi00x.h
+++ b/sound/firewire/digi00x/digi00x.h
@@ -37,6 +37,9 @@
 	struct mutex mutex;
 	spinlock_t lock;
 
+	bool registered;
+	struct delayed_work dwork;
+
 	struct amdtp_stream tx_stream;
 	struct fw_iso_resources tx_resources;
 
diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c
index 8f27b67..71a0613 100644
--- a/sound/firewire/fireworks/fireworks.c
+++ b/sound/firewire/fireworks/fireworks.c
@@ -168,11 +168,34 @@
 	       sizeof(struct snd_efw_phys_grp) * hwinfo->phys_in_grp_count);
 	memcpy(&efw->phys_out_grps, hwinfo->phys_out_grps,
 	       sizeof(struct snd_efw_phys_grp) * hwinfo->phys_out_grp_count);
+
+	/* AudioFire8 (since 2009) and AudioFirePre8 */
+	if (hwinfo->type == MODEL_ECHO_AUDIOFIRE_9)
+		efw->is_af9 = true;
+	/* These models uses the same firmware. */
+	if (hwinfo->type == MODEL_ECHO_AUDIOFIRE_2 ||
+	    hwinfo->type == MODEL_ECHO_AUDIOFIRE_4 ||
+	    hwinfo->type == MODEL_ECHO_AUDIOFIRE_9 ||
+	    hwinfo->type == MODEL_GIBSON_RIP ||
+	    hwinfo->type == MODEL_GIBSON_GOLDTOP)
+		efw->is_fireworks3 = true;
 end:
 	kfree(hwinfo);
 	return err;
 }
 
+static void efw_free(struct snd_efw *efw)
+{
+	snd_efw_stream_destroy_duplex(efw);
+	snd_efw_transaction_remove_instance(efw);
+	fw_unit_put(efw->unit);
+
+	kfree(efw->resp_buf);
+
+	mutex_destroy(&efw->mutex);
+	kfree(efw);
+}
+
 /*
  * This module releases the FireWire unit data after all ALSA character devices
  * are released by applications. This is for releasing stream data or finishing
@@ -184,28 +207,24 @@
 {
 	struct snd_efw *efw = card->private_data;
 
-	snd_efw_stream_destroy_duplex(efw);
-	snd_efw_transaction_remove_instance(efw);
-	fw_unit_put(efw->unit);
-
-	kfree(efw->resp_buf);
-
 	if (efw->card_index >= 0) {
 		mutex_lock(&devices_mutex);
 		clear_bit(efw->card_index, devices_used);
 		mutex_unlock(&devices_mutex);
 	}
 
-	mutex_destroy(&efw->mutex);
+	efw_free(card->private_data);
 }
 
-static int
-efw_probe(struct fw_unit *unit,
-	  const struct ieee1394_device_id *entry)
+static void
+do_registration(struct work_struct *work)
 {
-	struct snd_card *card;
-	struct snd_efw *efw;
-	int card_index, err;
+	struct snd_efw *efw = container_of(work, struct snd_efw, dwork.work);
+	unsigned int card_index;
+	int err;
+
+	if (efw->registered)
+		return;
 
 	mutex_lock(&devices_mutex);
 
@@ -215,24 +234,16 @@
 			break;
 	}
 	if (card_index >= SNDRV_CARDS) {
-		err = -ENOENT;
-		goto end;
+		mutex_unlock(&devices_mutex);
+		return;
 	}
 
-	err = snd_card_new(&unit->device, index[card_index], id[card_index],
-			   THIS_MODULE, sizeof(struct snd_efw), &card);
-	if (err < 0)
-		goto end;
-	efw = card->private_data;
-	efw->card_index = card_index;
-	set_bit(card_index, devices_used);
-	card->private_free = efw_card_free;
-
-	efw->card = card;
-	efw->unit = fw_unit_get(unit);
-	mutex_init(&efw->mutex);
-	spin_lock_init(&efw->lock);
-	init_waitqueue_head(&efw->hwdep_wait);
+	err = snd_card_new(&efw->unit->device, index[card_index],
+			   id[card_index], THIS_MODULE, 0, &efw->card);
+	if (err < 0) {
+		mutex_unlock(&devices_mutex);
+		return;
+	}
 
 	/* prepare response buffer */
 	snd_efw_resp_buf_size = clamp(snd_efw_resp_buf_size,
@@ -248,16 +259,10 @@
 	err = get_hardware_info(efw);
 	if (err < 0)
 		goto error;
-	/* AudioFire8 (since 2009) and AudioFirePre8 */
-	if (entry->model_id == MODEL_ECHO_AUDIOFIRE_9)
-		efw->is_af9 = true;
-	/* These models uses the same firmware. */
-	if (entry->model_id == MODEL_ECHO_AUDIOFIRE_2 ||
-	    entry->model_id == MODEL_ECHO_AUDIOFIRE_4 ||
-	    entry->model_id == MODEL_ECHO_AUDIOFIRE_9 ||
-	    entry->model_id == MODEL_GIBSON_RIP ||
-	    entry->model_id == MODEL_GIBSON_GOLDTOP)
-		efw->is_fireworks3 = true;
+
+	err = snd_efw_stream_init_duplex(efw);
+	if (err < 0)
+		goto error;
 
 	snd_efw_proc_init(efw);
 
@@ -275,44 +280,93 @@
 	if (err < 0)
 		goto error;
 
-	err = snd_efw_stream_init_duplex(efw);
+	err = snd_card_register(efw->card);
 	if (err < 0)
 		goto error;
 
-	err = snd_card_register(card);
-	if (err < 0) {
-		snd_efw_stream_destroy_duplex(efw);
-		goto error;
-	}
+	set_bit(card_index, devices_used);
+	mutex_unlock(&devices_mutex);
 
-	dev_set_drvdata(&unit->device, efw);
-end:
-	mutex_unlock(&devices_mutex);
-	return err;
+	/*
+	 * After registered, efw instance can be released corresponding to
+	 * releasing the sound card instance.
+	 */
+	efw->card->private_free = efw_card_free;
+	efw->card->private_data = efw;
+	efw->registered = true;
+
+	return;
 error:
-	snd_efw_transaction_remove_instance(efw);
 	mutex_unlock(&devices_mutex);
-	snd_card_free(card);
-	return err;
+	snd_efw_transaction_remove_instance(efw);
+	snd_efw_stream_destroy_duplex(efw);
+	snd_card_free(efw->card);
+	dev_info(&efw->unit->device,
+		 "Sound card registration failed: %d\n", err);
+}
+
+static int
+efw_probe(struct fw_unit *unit, const struct ieee1394_device_id *entry)
+{
+	struct snd_efw *efw;
+
+	efw = kzalloc(sizeof(struct snd_efw), GFP_KERNEL);
+	if (efw == NULL)
+		return -ENOMEM;
+
+	efw->unit = fw_unit_get(unit);
+	dev_set_drvdata(&unit->device, efw);
+
+	mutex_init(&efw->mutex);
+	spin_lock_init(&efw->lock);
+	init_waitqueue_head(&efw->hwdep_wait);
+
+	/* Allocate and register this sound card later. */
+	INIT_DEFERRABLE_WORK(&efw->dwork, do_registration);
+	snd_fw_schedule_registration(unit, &efw->dwork);
+
+	return 0;
 }
 
 static void efw_update(struct fw_unit *unit)
 {
 	struct snd_efw *efw = dev_get_drvdata(&unit->device);
 
+	/* Postpone a workqueue for deferred registration. */
+	if (!efw->registered)
+		snd_fw_schedule_registration(unit, &efw->dwork);
+
 	snd_efw_transaction_bus_reset(efw->unit);
 
-	mutex_lock(&efw->mutex);
-	snd_efw_stream_update_duplex(efw);
-	mutex_unlock(&efw->mutex);
+	/*
+	 * After registration, userspace can start packet streaming, then this
+	 * code block works fine.
+	 */
+	if (efw->registered) {
+		mutex_lock(&efw->mutex);
+		snd_efw_stream_update_duplex(efw);
+		mutex_unlock(&efw->mutex);
+	}
 }
 
 static void efw_remove(struct fw_unit *unit)
 {
 	struct snd_efw *efw = dev_get_drvdata(&unit->device);
 
-	/* No need to wait for releasing card object in this context. */
-	snd_card_free_when_closed(efw->card);
+	/*
+	 * Confirm to stop the work for registration before the sound card is
+	 * going to be released. The work is not scheduled again because bus
+	 * reset handler is not called anymore.
+	 */
+	cancel_delayed_work_sync(&efw->dwork);
+
+	if (efw->registered) {
+		/* No need to wait for releasing card object in this context. */
+		snd_card_free_when_closed(efw->card);
+	} else {
+		/* Don't forget this case. */
+		efw_free(efw);
+	}
 }
 
 static const struct ieee1394_device_id efw_id_table[] = {
diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index 96c4e0c..03ed352 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h
@@ -65,6 +65,9 @@
 	struct mutex mutex;
 	spinlock_t lock;
 
+	bool registered;
+	struct delayed_work dwork;
+
 	/* for transaction */
 	u32 seqnum;
 	bool resp_addr_changable;
@@ -81,7 +84,6 @@
 	unsigned int pcm_capture_channels[SND_EFW_MULTIPLIER_MODES];
 	unsigned int pcm_playback_channels[SND_EFW_MULTIPLIER_MODES];
 
-	struct amdtp_stream *master;
 	struct amdtp_stream tx_stream;
 	struct amdtp_stream rx_stream;
 	struct cmp_connection out_conn;
diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c
index 425db8d..ee47924 100644
--- a/sound/firewire/fireworks/fireworks_stream.c
+++ b/sound/firewire/fireworks/fireworks_stream.c
@@ -121,23 +121,6 @@
 }
 
 static int
-get_sync_mode(struct snd_efw *efw, enum cip_flags *sync_mode)
-{
-	enum snd_efw_clock_source clock_source;
-	int err;
-
-	err = snd_efw_command_get_clock_source(efw, &clock_source);
-	if (err < 0)
-		return err;
-
-	if (clock_source == SND_EFW_CLOCK_SOURCE_SYTMATCH)
-		return -ENOSYS;
-
-	*sync_mode = CIP_SYNC_TO_DEVICE;
-	return 0;
-}
-
-static int
 check_connection_used_by_others(struct snd_efw *efw, struct amdtp_stream *s)
 {
 	struct cmp_connection *conn;
@@ -208,9 +191,6 @@
 
 int snd_efw_stream_start_duplex(struct snd_efw *efw, unsigned int rate)
 {
-	struct amdtp_stream *master, *slave;
-	unsigned int slave_substreams;
-	enum cip_flags sync_mode;
 	unsigned int curr_rate;
 	int err = 0;
 
@@ -218,32 +198,19 @@
 	if (efw->playback_substreams == 0 && efw->capture_substreams  == 0)
 		goto end;
 
-	err = get_sync_mode(efw, &sync_mode);
-	if (err < 0)
-		goto end;
-	if (sync_mode == CIP_SYNC_TO_DEVICE) {
-		master = &efw->tx_stream;
-		slave  = &efw->rx_stream;
-		slave_substreams  = efw->playback_substreams;
-	} else {
-		master = &efw->rx_stream;
-		slave  = &efw->tx_stream;
-		slave_substreams = efw->capture_substreams;
-	}
-
 	/*
 	 * Considering JACK/FFADO streaming:
 	 * TODO: This can be removed hwdep functionality becomes popular.
 	 */
-	err = check_connection_used_by_others(efw, master);
+	err = check_connection_used_by_others(efw, &efw->rx_stream);
 	if (err < 0)
 		goto end;
 
 	/* packet queueing error */
-	if (amdtp_streaming_error(slave))
-		stop_stream(efw, slave);
-	if (amdtp_streaming_error(master))
-		stop_stream(efw, master);
+	if (amdtp_streaming_error(&efw->tx_stream))
+		stop_stream(efw, &efw->tx_stream);
+	if (amdtp_streaming_error(&efw->rx_stream))
+		stop_stream(efw, &efw->rx_stream);
 
 	/* stop streams if rate is different */
 	err = snd_efw_command_get_sampling_rate(efw, &curr_rate);
@@ -252,20 +219,17 @@
 	if (rate == 0)
 		rate = curr_rate;
 	if (rate != curr_rate) {
-		stop_stream(efw, slave);
-		stop_stream(efw, master);
+		stop_stream(efw, &efw->tx_stream);
+		stop_stream(efw, &efw->rx_stream);
 	}
 
 	/* master should be always running */
-	if (!amdtp_stream_running(master)) {
-		amdtp_stream_set_sync(sync_mode, master, slave);
-		efw->master = master;
-
+	if (!amdtp_stream_running(&efw->rx_stream)) {
 		err = snd_efw_command_set_sampling_rate(efw, rate);
 		if (err < 0)
 			goto end;
 
-		err = start_stream(efw, master, rate);
+		err = start_stream(efw, &efw->rx_stream, rate);
 		if (err < 0) {
 			dev_err(&efw->unit->device,
 				"fail to start AMDTP master stream:%d\n", err);
@@ -274,12 +238,13 @@
 	}
 
 	/* start slave if needed */
-	if (slave_substreams > 0 && !amdtp_stream_running(slave)) {
-		err = start_stream(efw, slave, rate);
+	if (efw->capture_substreams > 0 &&
+	    !amdtp_stream_running(&efw->tx_stream)) {
+		err = start_stream(efw, &efw->tx_stream, rate);
 		if (err < 0) {
 			dev_err(&efw->unit->device,
 				"fail to start AMDTP slave stream:%d\n", err);
-			stop_stream(efw, master);
+			stop_stream(efw, &efw->rx_stream);
 		}
 	}
 end:
@@ -288,26 +253,11 @@
 
 void snd_efw_stream_stop_duplex(struct snd_efw *efw)
 {
-	struct amdtp_stream *master, *slave;
-	unsigned int master_substreams, slave_substreams;
+	if (efw->capture_substreams == 0) {
+		stop_stream(efw, &efw->tx_stream);
 
-	if (efw->master == &efw->rx_stream) {
-		slave  = &efw->tx_stream;
-		master = &efw->rx_stream;
-		slave_substreams  = efw->capture_substreams;
-		master_substreams = efw->playback_substreams;
-	} else {
-		slave  = &efw->rx_stream;
-		master = &efw->tx_stream;
-		slave_substreams  = efw->playback_substreams;
-		master_substreams = efw->capture_substreams;
-	}
-
-	if (slave_substreams == 0) {
-		stop_stream(efw, slave);
-
-		if (master_substreams == 0)
-			stop_stream(efw, master);
+		if (efw->playback_substreams == 0)
+			stop_stream(efw, &efw->rx_stream);
 	}
 }
 
diff --git a/sound/firewire/lib.c b/sound/firewire/lib.c
index f80aafa..ca4dfcf 100644
--- a/sound/firewire/lib.c
+++ b/sound/firewire/lib.c
@@ -67,6 +67,38 @@
 }
 EXPORT_SYMBOL(snd_fw_transaction);
 
+#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
+
+/**
+ * snd_fw_schedule_registration - schedule work for sound card registration
+ * @unit: an instance for unit on IEEE 1394 bus
+ * @dwork: delayed work with callback function
+ *
+ * This function is not designed for general purposes. When new unit is
+ * connected to IEEE 1394 bus, the bus is under bus-reset state because of
+ * topological change. In this state, units tend to fail both of asynchronous
+ * and isochronous communication. To avoid this problem, this function is used
+ * to postpone sound card registration after the state. The callers must
+ * set up instance of delayed work in advance.
+ */
+void snd_fw_schedule_registration(struct fw_unit *unit,
+				  struct delayed_work *dwork)
+{
+	u64 now, delay;
+
+	now = get_jiffies_64();
+	delay = fw_parent_device(unit)->card->reset_jiffies
+					+ msecs_to_jiffies(PROBE_DELAY_MS);
+
+	if (time_after64(delay, now))
+		delay -= now;
+	else
+		delay = 0;
+
+	mod_delayed_work(system_wq, dwork, delay);
+}
+EXPORT_SYMBOL(snd_fw_schedule_registration);
+
 static void async_midi_port_callback(struct fw_card *card, int rcode,
 				     void *data, size_t length,
 				     void *callback_data)
diff --git a/sound/firewire/lib.h b/sound/firewire/lib.h
index f3f6f84..f676931 100644
--- a/sound/firewire/lib.h
+++ b/sound/firewire/lib.h
@@ -22,6 +22,9 @@
 	return rcode == RCODE_TYPE_ERROR || rcode == RCODE_ADDRESS_ERROR;
 }
 
+void snd_fw_schedule_registration(struct fw_unit *unit,
+				  struct delayed_work *dwork);
+
 struct snd_fw_async_midi_port;
 typedef int (*snd_fw_async_midi_port_fill)(
 				struct snd_rawmidi_substream *substream,
diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c
index 7cb5743..d9361f3 100644
--- a/sound/firewire/oxfw/oxfw-stream.c
+++ b/sound/firewire/oxfw/oxfw-stream.c
@@ -242,8 +242,7 @@
 	 * blocks than IEC 61883-6 defines.
 	 */
 	if (stream == &oxfw->tx_stream) {
-		oxfw->tx_stream.flags |= CIP_SKIP_INIT_DBC_CHECK |
-					 CIP_JUMBO_PAYLOAD;
+		oxfw->tx_stream.flags |= CIP_JUMBO_PAYLOAD;
 		if (oxfw->wrong_dbs)
 			oxfw->tx_stream.flags |= CIP_WRONG_DBS;
 	}
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index abedc22..e629b88 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -118,15 +118,8 @@
 	return err;
 }
 
-/*
- * This module releases the FireWire unit data after all ALSA character devices
- * are released by applications. This is for releasing stream data or finishing
- * transactions safely. Thus at returning from .remove(), this module still keep
- * references for the unit.
- */
-static void oxfw_card_free(struct snd_card *card)
+static void oxfw_free(struct snd_oxfw *oxfw)
 {
-	struct snd_oxfw *oxfw = card->private_data;
 	unsigned int i;
 
 	snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->rx_stream);
@@ -144,6 +137,17 @@
 	mutex_destroy(&oxfw->mutex);
 }
 
+/*
+ * This module releases the FireWire unit data after all ALSA character devices
+ * are released by applications. This is for releasing stream data or finishing
+ * transactions safely. Thus at returning from .remove(), this module still keep
+ * references for the unit.
+ */
+static void oxfw_card_free(struct snd_card *card)
+{
+	oxfw_free(card->private_data);
+}
+
 static int detect_quirks(struct snd_oxfw *oxfw)
 {
 	struct fw_device *fw_dev = fw_parent_device(oxfw->unit);
@@ -205,33 +209,18 @@
 	return 0;
 }
 
-static int oxfw_probe(struct fw_unit *unit,
-		      const struct ieee1394_device_id *entry)
+static void do_registration(struct work_struct *work)
 {
-	struct snd_card *card;
-	struct snd_oxfw *oxfw;
+	struct snd_oxfw *oxfw = container_of(work, struct snd_oxfw, dwork.work);
 	int err;
 
-	if (entry->vendor_id == VENDOR_LOUD && !detect_loud_models(unit))
-		return -ENODEV;
+	if (oxfw->registered)
+		return;
 
-	err = snd_card_new(&unit->device, -1, NULL, THIS_MODULE,
-			   sizeof(*oxfw), &card);
+	err = snd_card_new(&oxfw->unit->device, -1, NULL, THIS_MODULE, 0,
+			   &oxfw->card);
 	if (err < 0)
-		return err;
-
-	card->private_free = oxfw_card_free;
-	oxfw = card->private_data;
-	oxfw->card = card;
-	mutex_init(&oxfw->mutex);
-	oxfw->unit = fw_unit_get(unit);
-	oxfw->entry = entry;
-	spin_lock_init(&oxfw->lock);
-	init_waitqueue_head(&oxfw->hwdep_wait);
-
-	err = snd_oxfw_stream_discover(oxfw);
-	if (err < 0)
-		goto error;
+		return;
 
 	err = name_card(oxfw);
 	if (err < 0)
@@ -241,6 +230,19 @@
 	if (err < 0)
 		goto error;
 
+	err = snd_oxfw_stream_discover(oxfw);
+	if (err < 0)
+		goto error;
+
+	err = snd_oxfw_stream_init_simplex(oxfw, &oxfw->rx_stream);
+	if (err < 0)
+		goto error;
+	if (oxfw->has_output) {
+		err = snd_oxfw_stream_init_simplex(oxfw, &oxfw->tx_stream);
+		if (err < 0)
+			goto error;
+	}
+
 	err = snd_oxfw_create_pcm(oxfw);
 	if (err < 0)
 		goto error;
@@ -255,54 +257,97 @@
 	if (err < 0)
 		goto error;
 
-	err = snd_oxfw_stream_init_simplex(oxfw, &oxfw->rx_stream);
+	err = snd_card_register(oxfw->card);
 	if (err < 0)
 		goto error;
-	if (oxfw->has_output) {
-		err = snd_oxfw_stream_init_simplex(oxfw, &oxfw->tx_stream);
-		if (err < 0)
-			goto error;
-	}
 
-	err = snd_card_register(card);
-	if (err < 0) {
-		snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->rx_stream);
-		if (oxfw->has_output)
-			snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->tx_stream);
-		goto error;
-	}
+	/*
+	 * After registered, oxfw instance can be released corresponding to
+	 * releasing the sound card instance.
+	 */
+	oxfw->card->private_free = oxfw_card_free;
+	oxfw->card->private_data = oxfw;
+	oxfw->registered = true;
+
+	return;
+error:
+	snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->rx_stream);
+	if (oxfw->has_output)
+		snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->tx_stream);
+	snd_card_free(oxfw->card);
+	dev_info(&oxfw->unit->device,
+		 "Sound card registration failed: %d\n", err);
+}
+
+static int oxfw_probe(struct fw_unit *unit,
+		      const struct ieee1394_device_id *entry)
+{
+	struct snd_oxfw *oxfw;
+
+	if (entry->vendor_id == VENDOR_LOUD && !detect_loud_models(unit))
+		return -ENODEV;
+
+	/* Allocate this independent of sound card instance. */
+	oxfw = kzalloc(sizeof(struct snd_oxfw), GFP_KERNEL);
+	if (oxfw == NULL)
+		return -ENOMEM;
+
+	oxfw->entry = entry;
+	oxfw->unit = fw_unit_get(unit);
 	dev_set_drvdata(&unit->device, oxfw);
 
+	mutex_init(&oxfw->mutex);
+	spin_lock_init(&oxfw->lock);
+	init_waitqueue_head(&oxfw->hwdep_wait);
+
+	/* Allocate and register this sound card later. */
+	INIT_DEFERRABLE_WORK(&oxfw->dwork, do_registration);
+	snd_fw_schedule_registration(unit, &oxfw->dwork);
+
 	return 0;
-error:
-	snd_card_free(card);
-	return err;
 }
 
 static void oxfw_bus_reset(struct fw_unit *unit)
 {
 	struct snd_oxfw *oxfw = dev_get_drvdata(&unit->device);
 
+	if (!oxfw->registered)
+		snd_fw_schedule_registration(unit, &oxfw->dwork);
+
 	fcp_bus_reset(oxfw->unit);
 
-	mutex_lock(&oxfw->mutex);
+	if (oxfw->registered) {
+		mutex_lock(&oxfw->mutex);
 
-	snd_oxfw_stream_update_simplex(oxfw, &oxfw->rx_stream);
-	if (oxfw->has_output)
-		snd_oxfw_stream_update_simplex(oxfw, &oxfw->tx_stream);
+		snd_oxfw_stream_update_simplex(oxfw, &oxfw->rx_stream);
+		if (oxfw->has_output)
+			snd_oxfw_stream_update_simplex(oxfw, &oxfw->tx_stream);
 
-	mutex_unlock(&oxfw->mutex);
+		mutex_unlock(&oxfw->mutex);
 
-	if (oxfw->entry->vendor_id == OUI_STANTON)
-		snd_oxfw_scs1x_update(oxfw);
+		if (oxfw->entry->vendor_id == OUI_STANTON)
+			snd_oxfw_scs1x_update(oxfw);
+	}
 }
 
 static void oxfw_remove(struct fw_unit *unit)
 {
 	struct snd_oxfw *oxfw = dev_get_drvdata(&unit->device);
 
-	/* No need to wait for releasing card object in this context. */
-	snd_card_free_when_closed(oxfw->card);
+	/*
+	 * Confirm to stop the work for registration before the sound card is
+	 * going to be released. The work is not scheduled again because bus
+	 * reset handler is not called anymore.
+	 */
+	cancel_delayed_work_sync(&oxfw->dwork);
+
+	if (oxfw->registered) {
+		/* No need to wait for releasing card object in this context. */
+		snd_card_free_when_closed(oxfw->card);
+	} else {
+		/* Don't forget this case. */
+		oxfw_free(oxfw);
+	}
 }
 
 static const struct compat_info griffin_firewave = {
diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h
index 9beecc2..2047dcb 100644
--- a/sound/firewire/oxfw/oxfw.h
+++ b/sound/firewire/oxfw/oxfw.h
@@ -36,10 +36,12 @@
 struct snd_oxfw {
 	struct snd_card *card;
 	struct fw_unit *unit;
-	const struct device_info *device_info;
 	struct mutex mutex;
 	spinlock_t lock;
 
+	bool registered;
+	struct delayed_work dwork;
+
 	bool wrong_dbs;
 	bool has_output;
 	u8 *tx_stream_formats[SND_OXFW_STREAM_FORMAT_ENTRIES];
diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c
index 0e6dd5c6..4ad3bd7 100644
--- a/sound/firewire/tascam/tascam-stream.c
+++ b/sound/firewire/tascam/tascam-stream.c
@@ -381,19 +381,17 @@
 	if (err < 0)
 		return err;
 	if (curr_rate != rate ||
-	    amdtp_streaming_error(&tscm->tx_stream) ||
-	    amdtp_streaming_error(&tscm->rx_stream)) {
+	    amdtp_streaming_error(&tscm->rx_stream) ||
+	    amdtp_streaming_error(&tscm->tx_stream)) {
 		finish_session(tscm);
 
-		amdtp_stream_stop(&tscm->tx_stream);
 		amdtp_stream_stop(&tscm->rx_stream);
+		amdtp_stream_stop(&tscm->tx_stream);
 
 		release_resources(tscm);
 	}
 
-	if (!amdtp_stream_running(&tscm->tx_stream)) {
-		amdtp_stream_set_sync(CIP_SYNC_TO_DEVICE,
-				      &tscm->tx_stream, &tscm->rx_stream);
+	if (!amdtp_stream_running(&tscm->rx_stream)) {
 		err = keep_resources(tscm, rate);
 		if (err < 0)
 			goto error;
@@ -406,20 +404,6 @@
 		if (err < 0)
 			goto error;
 
-		err = amdtp_stream_start(&tscm->tx_stream,
-				tscm->tx_resources.channel,
-				fw_parent_device(tscm->unit)->max_speed);
-		if (err < 0)
-			goto error;
-
-		if (!amdtp_stream_wait_callback(&tscm->tx_stream,
-						CALLBACK_TIMEOUT)) {
-			err = -ETIMEDOUT;
-			goto error;
-		}
-	}
-
-	if (!amdtp_stream_running(&tscm->rx_stream)) {
 		err = amdtp_stream_start(&tscm->rx_stream,
 				tscm->rx_resources.channel,
 				fw_parent_device(tscm->unit)->max_speed);
@@ -433,10 +417,24 @@
 		}
 	}
 
+	if (!amdtp_stream_running(&tscm->tx_stream)) {
+		err = amdtp_stream_start(&tscm->tx_stream,
+				tscm->tx_resources.channel,
+				fw_parent_device(tscm->unit)->max_speed);
+		if (err < 0)
+			goto error;
+
+		if (!amdtp_stream_wait_callback(&tscm->tx_stream,
+						CALLBACK_TIMEOUT)) {
+			err = -ETIMEDOUT;
+			goto error;
+		}
+	}
+
 	return 0;
 error:
-	amdtp_stream_stop(&tscm->tx_stream);
 	amdtp_stream_stop(&tscm->rx_stream);
+	amdtp_stream_stop(&tscm->tx_stream);
 
 	finish_session(tscm);
 	release_resources(tscm);
diff --git a/sound/firewire/tascam/tascam.c b/sound/firewire/tascam/tascam.c
index e281c33..9dc93a7 100644
--- a/sound/firewire/tascam/tascam.c
+++ b/sound/firewire/tascam/tascam.c
@@ -85,10 +85,8 @@
 	return 0;
 }
 
-static void tscm_card_free(struct snd_card *card)
+static void tscm_free(struct snd_tscm *tscm)
 {
-	struct snd_tscm *tscm = card->private_data;
-
 	snd_tscm_transaction_unregister(tscm);
 	snd_tscm_stream_destroy_duplex(tscm);
 
@@ -97,44 +95,36 @@
 	mutex_destroy(&tscm->mutex);
 }
 
-static int snd_tscm_probe(struct fw_unit *unit,
-			   const struct ieee1394_device_id *entry)
+static void tscm_card_free(struct snd_card *card)
 {
-	struct snd_card *card;
-	struct snd_tscm *tscm;
+	tscm_free(card->private_data);
+}
+
+static void do_registration(struct work_struct *work)
+{
+	struct snd_tscm *tscm = container_of(work, struct snd_tscm, dwork.work);
 	int err;
 
-	/* create card */
-	err = snd_card_new(&unit->device, -1, NULL, THIS_MODULE,
-			   sizeof(struct snd_tscm), &card);
+	err = snd_card_new(&tscm->unit->device, -1, NULL, THIS_MODULE, 0,
+			   &tscm->card);
 	if (err < 0)
-		return err;
-	card->private_free = tscm_card_free;
-
-	/* initialize myself */
-	tscm = card->private_data;
-	tscm->card = card;
-	tscm->unit = fw_unit_get(unit);
-
-	mutex_init(&tscm->mutex);
-	spin_lock_init(&tscm->lock);
-	init_waitqueue_head(&tscm->hwdep_wait);
+		return;
 
 	err = identify_model(tscm);
 	if (err < 0)
 		goto error;
 
-	snd_tscm_proc_init(tscm);
+	err = snd_tscm_transaction_register(tscm);
+	if (err < 0)
+		goto error;
 
 	err = snd_tscm_stream_init_duplex(tscm);
 	if (err < 0)
 		goto error;
 
-	err = snd_tscm_create_pcm_devices(tscm);
-	if (err < 0)
-		goto error;
+	snd_tscm_proc_init(tscm);
 
-	err = snd_tscm_transaction_register(tscm);
+	err = snd_tscm_create_pcm_devices(tscm);
 	if (err < 0)
 		goto error;
 
@@ -146,35 +136,91 @@
 	if (err < 0)
 		goto error;
 
-	err = snd_card_register(card);
+	err = snd_card_register(tscm->card);
 	if (err < 0)
 		goto error;
 
+	/*
+	 * After registered, tscm instance can be released corresponding to
+	 * releasing the sound card instance.
+	 */
+	tscm->card->private_free = tscm_card_free;
+	tscm->card->private_data = tscm;
+	tscm->registered = true;
+
+	return;
+error:
+	snd_tscm_transaction_unregister(tscm);
+	snd_tscm_stream_destroy_duplex(tscm);
+	snd_card_free(tscm->card);
+	dev_info(&tscm->unit->device,
+		 "Sound card registration failed: %d\n", err);
+}
+
+static int snd_tscm_probe(struct fw_unit *unit,
+			   const struct ieee1394_device_id *entry)
+{
+	struct snd_tscm *tscm;
+
+	/* Allocate this independent of sound card instance. */
+	tscm = kzalloc(sizeof(struct snd_tscm), GFP_KERNEL);
+	if (tscm == NULL)
+		return -ENOMEM;
+
+	/* initialize myself */
+	tscm->unit = fw_unit_get(unit);
 	dev_set_drvdata(&unit->device, tscm);
 
-	return err;
-error:
-	snd_card_free(card);
-	return err;
+	mutex_init(&tscm->mutex);
+	spin_lock_init(&tscm->lock);
+	init_waitqueue_head(&tscm->hwdep_wait);
+
+	/* Allocate and register this sound card later. */
+	INIT_DEFERRABLE_WORK(&tscm->dwork, do_registration);
+	snd_fw_schedule_registration(unit, &tscm->dwork);
+
+	return 0;
 }
 
 static void snd_tscm_update(struct fw_unit *unit)
 {
 	struct snd_tscm *tscm = dev_get_drvdata(&unit->device);
 
+	/* Postpone a workqueue for deferred registration. */
+	if (!tscm->registered)
+		snd_fw_schedule_registration(unit, &tscm->dwork);
+
 	snd_tscm_transaction_reregister(tscm);
 
-	mutex_lock(&tscm->mutex);
-	snd_tscm_stream_update_duplex(tscm);
-	mutex_unlock(&tscm->mutex);
+	/*
+	 * After registration, userspace can start packet streaming, then this
+	 * code block works fine.
+	 */
+	if (tscm->registered) {
+		mutex_lock(&tscm->mutex);
+		snd_tscm_stream_update_duplex(tscm);
+		mutex_unlock(&tscm->mutex);
+	}
 }
 
 static void snd_tscm_remove(struct fw_unit *unit)
 {
 	struct snd_tscm *tscm = dev_get_drvdata(&unit->device);
 
-	/* No need to wait for releasing card object in this context. */
-	snd_card_free_when_closed(tscm->card);
+	/*
+	 * Confirm to stop the work for registration before the sound card is
+	 * going to be released. The work is not scheduled again because bus
+	 * reset handler is not called anymore.
+	 */
+	cancel_delayed_work_sync(&tscm->dwork);
+
+	if (tscm->registered) {
+		/* No need to wait for releasing card object in this context. */
+		snd_card_free_when_closed(tscm->card);
+	} else {
+		/* Don't forget this case. */
+		tscm_free(tscm);
+	}
 }
 
 static const struct ieee1394_device_id snd_tscm_id_table[] = {
diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h
index 30ab77e..1f61011 100644
--- a/sound/firewire/tascam/tascam.h
+++ b/sound/firewire/tascam/tascam.h
@@ -51,6 +51,8 @@
 	struct mutex mutex;
 	spinlock_t lock;
 
+	bool registered;
+	struct delayed_work dwork;
 	const struct snd_tscm_spec *spec;
 
 	struct fw_iso_resources tx_resources;
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 2433f7c..31b510c 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -105,6 +105,9 @@
 	INIT_LIST_HEAD(&ebus->hlink_list);
 	ebus->idx = idx++;
 
+	mutex_init(&ebus->lock);
+	ebus->cmd_dma_state = true;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_init);
@@ -144,6 +147,7 @@
 	if (!edev)
 		return -ENOMEM;
 	hdev = &edev->hdac;
+	edev->ebus = ebus;
 
 	snprintf(name, sizeof(name), "ehdaudio%dD%d", ebus->idx, addr);
 
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 548cc1e..860f8ca 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -186,6 +186,9 @@
 		hlink->lcaps  = readl(hlink->ml_addr + AZX_REG_ML_LCAP);
 		hlink->lsdiid = readw(hlink->ml_addr + AZX_REG_ML_LSDIID);
 
+		/* since link in On, update the ref */
+		hlink->ref_count = 1;
+
 		list_add_tail(&hlink->list, &ebus->hlink_list);
 	}
 
@@ -327,3 +330,66 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down_all);
+
+int snd_hdac_ext_bus_link_get(struct hdac_ext_bus *ebus,
+				struct hdac_ext_link *link)
+{
+	int ret = 0;
+
+	mutex_lock(&ebus->lock);
+
+	/*
+	 * if we move from 0 to 1, count will be 1 so power up this link
+	 * as well, also check the dma status and trigger that
+	 */
+	if (++link->ref_count == 1) {
+		if (!ebus->cmd_dma_state) {
+			snd_hdac_bus_init_cmd_io(&ebus->bus);
+			ebus->cmd_dma_state = true;
+		}
+
+		ret = snd_hdac_ext_bus_link_power_up(link);
+	}
+
+	mutex_unlock(&ebus->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_get);
+
+int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
+				struct hdac_ext_link *link)
+{
+	int ret = 0;
+	struct hdac_ext_link *hlink;
+	bool link_up = false;
+
+	mutex_lock(&ebus->lock);
+
+	/*
+	 * if we move from 1 to 0, count will be 0
+	 * so power down this link as well
+	 */
+	if (--link->ref_count == 0) {
+		ret = snd_hdac_ext_bus_link_power_down(link);
+
+		/*
+		 * now check if all links are off, if so turn off
+		 * cmd dma as well
+		 */
+		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+			if (hlink->ref_count) {
+				link_up = true;
+				break;
+			}
+		}
+
+		if (!link_up) {
+			snd_hdac_bus_stop_cmd_io(&ebus->bus);
+			ebus->cmd_dma_state = false;
+		}
+	}
+
+	mutex_unlock(&ebus->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_put);
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index 8c48623..9fee464 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -80,6 +80,22 @@
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_init_cmd_io);
 
+/* wait for cmd dmas till they are stopped */
+static void hdac_wait_for_cmd_dmas(struct hdac_bus *bus)
+{
+	unsigned long timeout;
+
+	timeout = jiffies + msecs_to_jiffies(100);
+	while ((snd_hdac_chip_readb(bus, RIRBCTL) & AZX_RBCTL_DMA_EN)
+		&& time_before(jiffies, timeout))
+		udelay(10);
+
+	timeout = jiffies + msecs_to_jiffies(100);
+	while ((snd_hdac_chip_readb(bus, CORBCTL) & AZX_CORBCTL_RUN)
+		&& time_before(jiffies, timeout))
+		udelay(10);
+}
+
 /**
  * snd_hdac_bus_stop_cmd_io - clean up CORB/RIRB buffers
  * @bus: HD-audio core bus
@@ -90,6 +106,7 @@
 	/* disable ringbuffer DMAs */
 	snd_hdac_chip_writeb(bus, RIRBCTL, 0);
 	snd_hdac_chip_writeb(bus, CORBCTL, 0);
+	hdac_wait_for_cmd_dmas(bus);
 	/* disable unsolicited responses */
 	snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, 0);
 	spin_unlock_irq(&bus->reg_lock);
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 607bbea..c9af022 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -158,22 +158,40 @@
 }
 EXPORT_SYMBOL_GPL(snd_hdac_i915_set_bclk);
 
-/* There is a fixed mapping between audio pin node and display port
- * on current Intel platforms:
+/* There is a fixed mapping between audio pin node and display port.
+ * on SNB, IVY, HSW, BSW, SKL, BXT, KBL:
  * Pin Widget 5 - PORT B (port = 1 in i915 driver)
  * Pin Widget 6 - PORT C (port = 2 in i915 driver)
  * Pin Widget 7 - PORT D (port = 3 in i915 driver)
+ *
+ * on VLV, ILK:
+ * Pin Widget 4 - PORT B (port = 1 in i915 driver)
+ * Pin Widget 5 - PORT C (port = 2 in i915 driver)
+ * Pin Widget 6 - PORT D (port = 3 in i915 driver)
  */
-static int pin2port(hda_nid_t pin_nid)
+static int pin2port(struct hdac_device *codec, hda_nid_t pin_nid)
 {
-	if (WARN_ON(pin_nid < 5 || pin_nid > 7))
+	int base_nid;
+
+	switch (codec->vendor_id) {
+	case 0x80860054: /* ILK */
+	case 0x80862804: /* ILK */
+	case 0x80862882: /* VLV */
+		base_nid = 3;
+		break;
+	default:
+		base_nid = 4;
+		break;
+	}
+
+	if (WARN_ON(pin_nid <= base_nid || pin_nid > base_nid + 3))
 		return -1;
-	return pin_nid - 4;
+	return pin_nid - base_nid;
 }
 
 /**
  * snd_hdac_sync_audio_rate - Set N/CTS based on the sample rate
- * @bus: HDA core bus
+ * @codec: HDA codec
  * @nid: the pin widget NID
  * @rate: the sample rate to set
  *
@@ -183,14 +201,15 @@
  * This function sets N/CTS value based on the given sample rate.
  * Returns zero for success, or a negative error code.
  */
-int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid, int rate)
+int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid, int rate)
 {
+	struct hdac_bus *bus = codec->bus;
 	struct i915_audio_component *acomp = bus->audio_component;
 	int port;
 
 	if (!acomp || !acomp->ops || !acomp->ops->sync_audio_rate)
 		return -ENODEV;
-	port = pin2port(nid);
+	port = pin2port(codec, nid);
 	if (port < 0)
 		return -EINVAL;
 	return acomp->ops->sync_audio_rate(acomp->dev, port, rate);
@@ -199,7 +218,7 @@
 
 /**
  * snd_hdac_acomp_get_eld - Get the audio state and ELD via component
- * @bus: HDA core bus
+ * @codec: HDA codec
  * @nid: the pin widget NID
  * @audio_enabled: the pointer to store the current audio state
  * @buffer: the buffer pointer to store ELD bytes
@@ -217,16 +236,17 @@
  * thus it may be over @max_bytes.  If it's over @max_bytes, it implies
  * that only a part of ELD bytes have been fetched.
  */
-int snd_hdac_acomp_get_eld(struct hdac_bus *bus, hda_nid_t nid,
+int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid,
 			   bool *audio_enabled, char *buffer, int max_bytes)
 {
+	struct hdac_bus *bus = codec->bus;
 	struct i915_audio_component *acomp = bus->audio_component;
 	int port;
 
 	if (!acomp || !acomp->ops || !acomp->ops->get_eld)
 		return -ENODEV;
 
-	port = pin2port(nid);
+	port = pin2port(codec, nid);
 	if (port < 0)
 		return -EINVAL;
 	return acomp->ops->get_eld(acomp->dev, port, audio_enabled,
@@ -338,6 +358,9 @@
 	struct i915_audio_component *acomp;
 	int ret;
 
+	if (WARN_ON(hdac_acomp))
+		return -EBUSY;
+
 	if (!i915_gfx_present())
 		return -ENODEV;
 
@@ -371,6 +394,7 @@
 out_err:
 	kfree(acomp);
 	bus->audio_component = NULL;
+	hdac_acomp = NULL;
 	dev_info(dev, "failed to add i915 component master (%d)\n", ret);
 
 	return ret;
@@ -404,6 +428,7 @@
 
 	kfree(acomp);
 	bus->audio_component = NULL;
+	hdac_acomp = NULL;
 
 	return 0;
 }
diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c
index d7ec862..c6c75e7 100644
--- a/sound/hda/hdmi_chmap.c
+++ b/sound/hda/hdmi_chmap.c
@@ -625,13 +625,30 @@
 	WARN_ON(count != channels);
 }
 
+static int spk_mask_from_spk_alloc(int spk_alloc)
+{
+	int i;
+	int spk_mask = eld_speaker_allocation_bits[0];
+
+	for (i = 0; i < ARRAY_SIZE(eld_speaker_allocation_bits); i++) {
+		if (spk_alloc & (1 << i))
+			spk_mask |= eld_speaker_allocation_bits[i];
+	}
+
+	return spk_mask;
+}
+
 static int hdmi_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 			      unsigned int size, unsigned int __user *tlv)
 {
 	struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
 	struct hdac_chmap *chmap = info->private_data;
+	int pcm_idx = kcontrol->private_value;
 	unsigned int __user *dst;
 	int chs, count = 0;
+	unsigned long max_chs;
+	int type;
+	int spk_alloc, spk_mask;
 
 	if (size < 8)
 		return -ENOMEM;
@@ -639,40 +656,59 @@
 		return -EFAULT;
 	size -= 8;
 	dst = tlv + 2;
-	for (chs = 2; chs <= chmap->channels_max; chs++) {
+
+	spk_alloc = chmap->ops.get_spk_alloc(chmap->hdac, pcm_idx);
+	spk_mask = spk_mask_from_spk_alloc(spk_alloc);
+
+	max_chs = hweight_long(spk_mask);
+
+	for (chs = 2; chs <= max_chs; chs++) {
 		int i;
 		struct hdac_cea_channel_speaker_allocation *cap;
 
 		cap = channel_allocations;
 		for (i = 0; i < ARRAY_SIZE(channel_allocations); i++, cap++) {
 			int chs_bytes = chs * 4;
-			int type = chmap->ops.chmap_cea_alloc_validate_get_type(
-								chmap, cap, chs);
 			unsigned int tlv_chmap[8];
 
-			if (type < 0)
+			if (cap->channels != chs)
 				continue;
+
+			if (!(cap->spk_mask == (spk_mask & cap->spk_mask)))
+				continue;
+
+			type = chmap->ops.chmap_cea_alloc_validate_get_type(
+							chmap, cap, chs);
+			if (type < 0)
+				return -ENODEV;
 			if (size < 8)
 				return -ENOMEM;
+
 			if (put_user(type, dst) ||
 			    put_user(chs_bytes, dst + 1))
 				return -EFAULT;
+
 			dst += 2;
 			size -= 8;
 			count += 8;
+
 			if (size < chs_bytes)
 				return -ENOMEM;
+
 			size -= chs_bytes;
 			count += chs_bytes;
 			chmap->ops.cea_alloc_to_tlv_chmap(chmap, cap,
 						tlv_chmap, chs);
+
 			if (copy_to_user(dst, tlv_chmap, chs_bytes))
 				return -EFAULT;
 			dst += chs;
 		}
 	}
+
 	if (put_user(count, tlv + 1))
 		return -EFAULT;
+
 	return 0;
 }
 
diff --git a/sound/hda/local.h b/sound/hda/local.h
index d692f41..0d5bb15 100644
--- a/sound/hda/local.h
+++ b/sound/hda/local.h
@@ -16,6 +16,16 @@
 	return (wcaps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT;
 }
 
+static inline unsigned int get_wcaps_channels(u32 wcaps)
+{
+	unsigned int chans;
+
+	chans = (wcaps & AC_WCAP_CHAN_CNT_EXT) >> 13;
+	chans = (chans + 1) * 2;
+
+	return chans;
+}
+
 extern const struct attribute_group *hdac_dev_attr_groups[];
 int hda_widget_sysfs_init(struct hdac_device *codec);
 void hda_widget_sysfs_exit(struct hdac_device *codec);
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 69f76ff..718d5e3 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -785,6 +785,9 @@
 	DPRINT (WF_DEBUG_LOAD_PATCH, "downloading patch %d\n",
 				      header->number);
 
+	if (header->number >= ARRAY_SIZE(dev->patch_status))
+		return -EINVAL;
+
 	dev->patch_status[header->number] |= WF_SLOT_FILLED;
 
 	bptr = buf;
@@ -809,6 +812,9 @@
 	DPRINT (WF_DEBUG_LOAD_PATCH, "downloading program %d\n",
 		header->number);
 
+	if (header->number >= ARRAY_SIZE(dev->prog_status))
+		return -EINVAL;
+
 	dev->prog_status[header->number] = WF_SLOT_USED;
 
 	/* XXX need to zero existing SLOT_USED bit for program_status[i]
@@ -898,6 +904,9 @@
 		header->number = x;
 	}
 
+	if (header->number >= WF_MAX_SAMPLE)
+		return -EINVAL;
+
 	if (header->size) {
 
 		/* XXX it's a debatable point whether or not RDONLY semantics
diff --git a/sound/oss/waveartist.c b/sound/oss/waveartist.c
index b36ea47..0b8d0de 100644
--- a/sound/oss/waveartist.c
+++ b/sound/oss/waveartist.c
@@ -1414,11 +1414,9 @@
 	else {
 #ifdef CONFIG_ARCH_NETWINDER
 		if (machine_is_netwinder()) {
-			init_timer(&vnc_timer);
-			vnc_timer.function = vnc_slider_tick;
-			vnc_timer.expires  = jiffies;
-			vnc_timer.data     = nr_waveartist_devs;
-			add_timer(&vnc_timer);
+			setup_timer(&vnc_timer, vnc_slider_tick,
+				    nr_waveartist_devs);
+			mod_timer(&vnc_timer, jiffies);
 
 			vnc_configure_mixer(devc, 0);
 
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 4667c32..4a054d7 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -2151,8 +2151,7 @@
 							   stream->resources, en,
 							   VORTEX_RESOURCE_SRC)) < 0) {
 					memset(stream->resources, 0,
-					       sizeof(unsigned char) *
-					       VORTEX_RESOURCE_LAST);
+					       sizeof(stream->resources));
 					return -EBUSY;
 				}
 				if (stream->type != VORTEX_PCM_A3D) {
@@ -2162,7 +2161,7 @@
 								   VORTEX_RESOURCE_MIXIN)) < 0) {
 						memset(stream->resources,
 						       0,
-						       sizeof(unsigned char) * VORTEX_RESOURCE_LAST);
+						       sizeof(stream->resources));
 						return -EBUSY;
 					}
 				}
@@ -2175,8 +2174,7 @@
 						   stream->resources, en,
 						   VORTEX_RESOURCE_A3D)) < 0) {
 				memset(stream->resources, 0,
-				       sizeof(unsigned char) *
-				       VORTEX_RESOURCE_LAST);
+				       sizeof(stream->resources));
 				dev_err(vortex->card->dev,
 					"out of A3D sources. Sorry\n");
 				return -EBUSY;
@@ -2290,8 +2288,7 @@
 						   VORTEX_RESOURCE_MIXOUT))
 			    < 0) {
 				memset(stream->resources, 0,
-				       sizeof(unsigned char) *
-				       VORTEX_RESOURCE_LAST);
+				       sizeof(stream->resources));
 				return -EBUSY;
 			}
 			if ((src[i] =
@@ -2299,8 +2296,7 @@
 						   stream->resources, en,
 						   VORTEX_RESOURCE_SRC)) < 0) {
 				memset(stream->resources, 0,
-				       sizeof(unsigned char) *
-				       VORTEX_RESOURCE_LAST);
+				       sizeof(stream->resources));
 				return -EBUSY;
 			}
 		}
diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c
index a6d6d8d..df5741a 100644
--- a/sound/pci/au88x0/au88x0_pcm.c
+++ b/sound/pci/au88x0/au88x0_pcm.c
@@ -432,7 +432,10 @@
 #endif
 	//printk(KERN_INFO "vortex: pointer = 0x%x\n", current_ptr);
 	spin_unlock(&chip->lock);
-	return (bytes_to_frames(substream->runtime, current_ptr));
+	current_ptr = bytes_to_frames(substream->runtime, current_ptr);
+	if (current_ptr >= substream->runtime->buffer_size)
+		current_ptr = 0;
+	return current_ptr;
 }
 
 /* operators */
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index a5d4604..8f94534 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -49,7 +49,7 @@
 	spinlock_t lock;		/* global timer lock (for xfitimer) */
 	spinlock_t list_lock;		/* lock for instance list */
 	struct ct_atc *atc;
-	struct ct_timer_ops *ops;
+	const struct ct_timer_ops *ops;
 	struct list_head instance_head;
 	struct list_head running_head;
 	unsigned int wc;		/* current wallclock */
@@ -128,7 +128,7 @@
 
 #define ct_systimer_free	ct_systimer_prepare
 
-static struct ct_timer_ops ct_systimer_ops = {
+static const struct ct_timer_ops ct_systimer_ops = {
 	.init = ct_systimer_init,
 	.free_instance = ct_systimer_free,
 	.prepare = ct_systimer_prepare,
@@ -322,7 +322,7 @@
 	ct_xfitimer_irq_stop(atimer);
 }
 
-static struct ct_timer_ops ct_xfitimer_ops = {
+static const struct ct_timer_ops ct_xfitimer_ops = {
 	.prepare = ct_xfitimer_prepare,
 	.start = ct_xfitimer_start,
 	.stop = ct_xfitimer_stop,
diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
index 0dc44eb..626cd21 100644
--- a/sound/pci/ens1370.c
+++ b/sound/pci/ens1370.c
@@ -1548,7 +1548,7 @@
 	int val = 0;
 	
 	spin_lock_irq(&ensoniq->reg_lock);
-	if ((ensoniq->ctrl & ES_1371_GPIO_OUTM) >= 4)
+	if (ensoniq->ctrl & ES_1371_GPIO_OUT(4))
 	    	val = 1;
 	ucontrol->value.integer.value[0] = val;
 	spin_unlock_irq(&ensoniq->reg_lock);
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index bb02c2d..7f3b5ed 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -50,9 +50,13 @@
 	bool "Allow dynamic codec reconfiguration"
 	help
 	  Say Y here to enable the HD-audio codec re-configuration feature.
-	  This adds the sysfs interfaces to allow user to clear the whole
-	  codec configuration, change the codec setup, add extra verbs,
-	  and re-configure the codec dynamically.
+	  It allows user to clear the whole codec configuration, change the
+	  codec setup, add extra verbs, and re-configure the codec dynamically.
+
+	  Note that this item alone doesn't provide the sysfs interface, but
+	  enables the feature just for the patch loader below.
+	  If you need the traditional sysfs entries for the manual interaction,
+	  turn on CONFIG_SND_HDA_HWDEP as well.
 
 config SND_HDA_INPUT_BEEP
 	bool "Support digital beep via input layer"
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index dfaf1a9..320445f 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -5434,6 +5434,7 @@
 	spec->cur_adc_stream_tag = stream_tag;
 	spec->cur_adc_format = format;
 	snd_hda_codec_setup_stream(codec, spec->cur_adc, stream_tag, 0, format);
+	call_pcm_capture_hook(hinfo, codec, substream, HDA_GEN_PCM_ACT_PREPARE);
 	return 0;
 }
 
@@ -5444,6 +5445,7 @@
 	struct hda_gen_spec *spec = codec->spec;
 	snd_hda_codec_cleanup_stream(codec, spec->cur_adc);
 	spec->cur_adc = 0;
+	call_pcm_capture_hook(hinfo, codec, substream, HDA_GEN_PCM_ACT_CLEANUP);
 	return 0;
 }
 
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index a010d70..d0d5ad8 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -114,6 +114,9 @@
 	int (*setup_stream)(struct hda_codec *codec, hda_nid_t cvt_nid,
 			    hda_nid_t pin_nid, u32 stream_tag, int format);
 
+	void (*pin_cvt_fixup)(struct hda_codec *codec,
+			      struct hdmi_spec_per_pin *per_pin,
+			      hda_nid_t cvt_nid);
 };
 
 struct hdmi_pcm {
@@ -684,7 +687,8 @@
 	if (!channels)
 		return;
 
-	if (is_haswell_plus(codec))
+	/* some HW (e.g. HSW+) needs reprogramming the amp at each time */
+	if (get_wcaps(codec, pin_nid) & AC_WCAP_OUT_AMP)
 		snd_hda_codec_write(codec, pin_nid, 0,
 					    AC_VERB_SET_AMP_GAIN_MUTE,
 					    AMP_OUT_UNMUTE);
@@ -864,9 +868,6 @@
 	struct hdmi_spec *spec = codec->spec;
 	int err;
 
-	if (is_haswell_plus(codec))
-		haswell_verify_D0(codec, cvt_nid, pin_nid);
-
 	err = spec->ops.pin_hbr_setup(codec, pin_nid, is_hbr_format(format));
 
 	if (err) {
@@ -884,7 +885,7 @@
  * of the pin.
  */
 static int hdmi_choose_cvt(struct hda_codec *codec,
-			int pin_idx, int *cvt_id, int *mux_id)
+			   int pin_idx, int *cvt_id)
 {
 	struct hdmi_spec *spec = codec->spec;
 	struct hdmi_spec_per_pin *per_pin;
@@ -925,8 +926,6 @@
 
 	if (cvt_id)
 		*cvt_id = cvt_idx;
-	if (mux_id)
-		*mux_id = mux_idx;
 
 	return 0;
 }
@@ -1019,9 +1018,6 @@
 	int mux_idx;
 	struct hdmi_spec *spec = codec->spec;
 
-	if (!is_haswell_plus(codec) && !is_valleyview_plus(codec))
-		return;
-
 	/* On Intel platform, the mapping of converter nid to
 	 * mux index of the pins are always the same.
 	 * The pin nid may be 0, this means all pins will not
@@ -1032,6 +1028,17 @@
 		intel_not_share_assigned_cvt(codec, pin_nid, mux_idx);
 }
 
+/* skeleton caller of pin_cvt_fixup ops */
+static void pin_cvt_fixup(struct hda_codec *codec,
+			  struct hdmi_spec_per_pin *per_pin,
+			  hda_nid_t cvt_nid)
+{
+	struct hdmi_spec *spec = codec->spec;
+
+	if (spec->ops.pin_cvt_fixup)
+		spec->ops.pin_cvt_fixup(codec, per_pin, cvt_nid);
+}
+
 /* called in hdmi_pcm_open when no pin is assigned to the PCM
  * in dyn_pcm_assign mode.
  */
@@ -1049,7 +1056,7 @@
 	if (pcm_idx < 0)
 		return -EINVAL;
 
-	err = hdmi_choose_cvt(codec, -1, &cvt_idx, NULL);
+	err = hdmi_choose_cvt(codec, -1, &cvt_idx);
 	if (err)
 		return err;
 
@@ -1057,7 +1064,7 @@
 	per_cvt->assigned = 1;
 	hinfo->nid = per_cvt->cvt_nid;
 
-	intel_not_share_assigned_cvt_nid(codec, 0, per_cvt->cvt_nid);
+	pin_cvt_fixup(codec, NULL, per_cvt->cvt_nid);
 
 	set_bit(pcm_idx, &spec->pcm_in_use);
 	/* todo: setup spdif ctls assign */
@@ -1089,7 +1096,7 @@
 {
 	struct hdmi_spec *spec = codec->spec;
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	int pin_idx, cvt_idx, pcm_idx, mux_idx = 0;
+	int pin_idx, cvt_idx, pcm_idx;
 	struct hdmi_spec_per_pin *per_pin;
 	struct hdmi_eld *eld;
 	struct hdmi_spec_per_cvt *per_cvt = NULL;
@@ -1118,7 +1125,7 @@
 		}
 	}
 
-	err = hdmi_choose_cvt(codec, pin_idx, &cvt_idx, &mux_idx);
+	err = hdmi_choose_cvt(codec, pin_idx, &cvt_idx);
 	if (err < 0) {
 		mutex_unlock(&spec->pcm_lock);
 		return err;
@@ -1135,11 +1142,10 @@
 
 	snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0,
 			    AC_VERB_SET_CONNECT_SEL,
-			    mux_idx);
+			    per_pin->mux_idx);
 
 	/* configure unused pins to choose other converters */
-	if (is_haswell_plus(codec) || is_valleyview_plus(codec))
-		intel_not_share_assigned_cvt(codec, per_pin->pin_nid, mux_idx);
+	pin_cvt_fixup(codec, per_pin, 0);
 
 	snd_hda_spdif_ctls_assign(codec, pcm_idx, per_cvt->cvt_nid);
 
@@ -1372,12 +1378,7 @@
 	 *   and this can make HW reset converter selection on a pin.
 	 */
 	if (eld->eld_valid && !old_eld_valid && per_pin->setup) {
-		if (is_haswell_plus(codec) || is_valleyview_plus(codec)) {
-			intel_verify_pin_cvt_connect(codec, per_pin);
-			intel_not_share_assigned_cvt(codec, per_pin->pin_nid,
-						     per_pin->mux_idx);
-		}
-
+		pin_cvt_fixup(codec, per_pin, 0);
 		hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm);
 	}
 
@@ -1484,7 +1485,7 @@
 
 	mutex_lock(&per_pin->lock);
 	eld->monitor_present = false;
-	size = snd_hdac_acomp_get_eld(&codec->bus->core, per_pin->pin_nid,
+	size = snd_hdac_acomp_get_eld(&codec->core, per_pin->pin_nid,
 				      &eld->monitor_present, eld->eld_buffer,
 				      ELD_MAX_SIZE);
 	if (size > 0) {
@@ -1711,7 +1712,7 @@
 		 * skip pin setup and return 0 to make audio playback
 		 * be ongoing
 		 */
-		intel_not_share_assigned_cvt_nid(codec, 0, cvt_nid);
+		pin_cvt_fixup(codec, NULL, cvt_nid);
 		snd_hda_codec_setup_stream(codec, cvt_nid,
 					stream_tag, 0, format);
 		mutex_unlock(&spec->pcm_lock);
@@ -1724,23 +1725,21 @@
 	}
 	per_pin = get_pin(spec, pin_idx);
 	pin_nid = per_pin->pin_nid;
-	if (is_haswell_plus(codec) || is_valleyview_plus(codec)) {
-		/* Verify pin:cvt selections to avoid silent audio after S3.
-		 * After S3, the audio driver restores pin:cvt selections
-		 * but this can happen before gfx is ready and such selection
-		 * is overlooked by HW. Thus multiple pins can share a same
-		 * default convertor and mute control will affect each other,
-		 * which can cause a resumed audio playback become silent
-		 * after S3.
-		 */
-		intel_verify_pin_cvt_connect(codec, per_pin);
-		intel_not_share_assigned_cvt(codec, pin_nid, per_pin->mux_idx);
-	}
+
+	/* Verify pin:cvt selections to avoid silent audio after S3.
+	 * After S3, the audio driver restores pin:cvt selections
+	 * but this can happen before gfx is ready and such selection
+	 * is overlooked by HW. Thus multiple pins can share a same
+	 * default convertor and mute control will affect each other,
+	 * which can cause a resumed audio playback become silent
+	 * after S3.
+	 */
+	pin_cvt_fixup(codec, per_pin, 0);
 
 	/* Call sync_audio_rate to set the N/CTS/M manually if necessary */
 	/* Todo: add DP1.2 MST audio support later */
 	if (codec_has_acomp(codec))
-		snd_hdac_sync_audio_rate(&codec->bus->core, pin_nid, runtime->rate);
+		snd_hdac_sync_audio_rate(&codec->core, pin_nid, runtime->rate);
 
 	non_pcm = check_non_pcm_per_cvt(codec, cvt_nid);
 	mutex_lock(&per_pin->lock);
@@ -1837,6 +1836,18 @@
 	.cleanup = generic_hdmi_playback_pcm_cleanup,
 };
 
+static int hdmi_get_spk_alloc(struct hdac_device *hdac, int pcm_idx)
+{
+	struct hda_codec *codec = container_of(hdac, struct hda_codec, core);
+	struct hdmi_spec *spec = codec->spec;
+	struct hdmi_spec_per_pin *per_pin = pcm_idx_to_pin(spec, pcm_idx);
+
+	if (!per_pin)
+		return 0;
+
+	return per_pin->sink_eld.info.spk_alloc;
+}
+
 static void hdmi_get_chmap(struct hdac_device *hdac, int pcm_idx,
 					unsigned char *chmap)
 {
@@ -2075,6 +2086,20 @@
 	snd_array_free(&spec->cvts);
 }
 
+static void generic_spec_free(struct hda_codec *codec)
+{
+	struct hdmi_spec *spec = codec->spec;
+
+	if (spec) {
+		if (spec->i915_bound)
+			snd_hdac_i915_exit(&codec->bus->core);
+		hdmi_array_free(spec);
+		kfree(spec);
+		codec->spec = NULL;
+	}
+	codec->dp_mst = false;
+}
+
 static void generic_hdmi_free(struct hda_codec *codec)
 {
 	struct hdmi_spec *spec = codec->spec;
@@ -2099,10 +2124,7 @@
 			spec->pcm_rec[pcm_idx].jack = NULL;
 	}
 
-	if (spec->i915_bound)
-		snd_hdac_i915_exit(&codec->bus->core);
-	hdmi_array_free(spec);
-	kfree(spec);
+	generic_spec_free(codec);
 }
 
 #ifdef CONFIG_PM
@@ -2140,6 +2162,55 @@
 	.setup_stream				= hdmi_setup_stream,
 };
 
+/* allocate codec->spec and assign/initialize generic parser ops */
+static int alloc_generic_hdmi(struct hda_codec *codec)
+{
+	struct hdmi_spec *spec;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	spec->ops = generic_standard_hdmi_ops;
+	mutex_init(&spec->pcm_lock);
+	snd_hdac_register_chmap_ops(&codec->core, &spec->chmap);
+
+	spec->chmap.ops.get_chmap = hdmi_get_chmap;
+	spec->chmap.ops.set_chmap = hdmi_set_chmap;
+	spec->chmap.ops.is_pcm_attached = is_hdmi_pcm_attached;
+	spec->chmap.ops.get_spk_alloc = hdmi_get_spk_alloc,
+
+	codec->spec = spec;
+	hdmi_array_init(spec, 4);
+
+	codec->patch_ops = generic_hdmi_patch_ops;
+
+	return 0;
+}
+
+/* generic HDMI parser */
+static int patch_generic_hdmi(struct hda_codec *codec)
+{
+	int err;
+
+	err = alloc_generic_hdmi(codec);
+	if (err < 0)
+		return err;
+
+	err = hdmi_parse_codec(codec);
+	if (err < 0) {
+		generic_spec_free(codec);
+		return err;
+	}
+
+	generic_hdmi_init_per_pins(codec);
+	return 0;
+}
+
+/*
+ * Intel codec parsers and helpers
+ */
+
 static void intel_haswell_fixup_connect_list(struct hda_codec *codec,
 					     hda_nid_t nid)
 {
@@ -2217,12 +2288,23 @@
 static void intel_pin_eld_notify(void *audio_ptr, int port)
 {
 	struct hda_codec *codec = audio_ptr;
-	int pin_nid = port + 0x04;
+	int pin_nid;
 
 	/* we assume only from port-B to port-D */
 	if (port < 1 || port > 3)
 		return;
 
+	switch (codec->core.vendor_id) {
+	case 0x80860054: /* ILK */
+	case 0x80862804: /* ILK */
+	case 0x80862882: /* VLV */
+		pin_nid = port + 0x03;
+		break;
+	default:
+		pin_nid = port + 0x04;
+		break;
+	}
+
 	/* skip notification during system suspend (but not in runtime PM);
 	 * the state will be updated at resume
 	 */
@@ -2236,95 +2318,161 @@
 	check_presence_and_report(codec, pin_nid);
 }
 
-static int patch_generic_hdmi(struct hda_codec *codec)
+/* register i915 component pin_eld_notify callback */
+static void register_i915_notifier(struct hda_codec *codec)
+{
+	struct hdmi_spec *spec = codec->spec;
+
+	spec->use_acomp_notifier = true;
+	spec->i915_audio_ops.audio_ptr = codec;
+	/* intel_audio_codec_enable() or intel_audio_codec_disable()
+	 * will call pin_eld_notify with using audio_ptr pointer
+	 * We need make sure audio_ptr is really setup
+	 */
+	wmb();
+	spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
+	snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
+}
+
+/* setup_stream ops override for HSW+ */
+static int i915_hsw_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid,
+				 hda_nid_t pin_nid, u32 stream_tag, int format)
+{
+	haswell_verify_D0(codec, cvt_nid, pin_nid);
+	return hdmi_setup_stream(codec, cvt_nid, pin_nid, stream_tag, format);
+}
+
+/* pin_cvt_fixup ops override for HSW+ and VLV+ */
+static void i915_pin_cvt_fixup(struct hda_codec *codec,
+			       struct hdmi_spec_per_pin *per_pin,
+			       hda_nid_t cvt_nid)
+{
+	if (per_pin) {
+		intel_verify_pin_cvt_connect(codec, per_pin);
+		intel_not_share_assigned_cvt(codec, per_pin->pin_nid,
+					     per_pin->mux_idx);
+	} else {
+		intel_not_share_assigned_cvt_nid(codec, 0, cvt_nid);
+	}
+}
+
+/* Intel Haswell and onwards; audio component with eld notifier */
+static int patch_i915_hsw_hdmi(struct hda_codec *codec)
 {
 	struct hdmi_spec *spec;
+	int err;
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
-	if (spec == NULL)
-		return -ENOMEM;
-
-	spec->ops = generic_standard_hdmi_ops;
-	mutex_init(&spec->pcm_lock);
-	snd_hdac_register_chmap_ops(&codec->core, &spec->chmap);
-
-	spec->chmap.ops.get_chmap = hdmi_get_chmap;
-	spec->chmap.ops.set_chmap = hdmi_set_chmap;
-	spec->chmap.ops.is_pcm_attached = is_hdmi_pcm_attached;
-
-	codec->spec = spec;
-	hdmi_array_init(spec, 4);
-
-#ifdef CONFIG_SND_HDA_I915
-	/* Try to bind with i915 for Intel HSW+ codecs (if not done yet) */
-	if ((codec->core.vendor_id >> 16) == 0x8086 &&
-	    is_haswell_plus(codec)) {
-#if 0
-		/* on-demand binding leads to an unbalanced refcount when
-		 * both i915 and hda drivers are probed concurrently;
-		 * disabled temporarily for now
-		 */
-		if (!codec->bus->core.audio_component)
-			if (!snd_hdac_i915_init(&codec->bus->core))
-				spec->i915_bound = true;
-#endif
-		/* use i915 audio component notifier for hotplug */
-		if (codec->bus->core.audio_component)
-			spec->use_acomp_notifier = true;
+	/* HSW+ requires i915 binding */
+	if (!codec->bus->core.audio_component) {
+		codec_info(codec, "No i915 binding for Intel HDMI/DP codec\n");
+		return -ENODEV;
 	}
-#endif
 
-	if (is_haswell_plus(codec)) {
-		intel_haswell_enable_all_pins(codec, true);
-		intel_haswell_fixup_enable_dp12(codec);
+	err = alloc_generic_hdmi(codec);
+	if (err < 0)
+		return err;
+	spec = codec->spec;
+
+	intel_haswell_enable_all_pins(codec, true);
+	intel_haswell_fixup_enable_dp12(codec);
+
+	/* For Haswell/Broadwell, the controller is also in the power well and
+	 * can cover the codec power request, and so need not set this flag.
+	 */
+	if (!is_haswell(codec) && !is_broadwell(codec))
+		codec->core.link_power_control = 1;
+
+	codec->patch_ops.set_power_state = haswell_set_power_state;
+	codec->dp_mst = true;
+	codec->depop_delay = 0;
+	codec->auto_runtime_pm = 1;
+
+	spec->ops.setup_stream = i915_hsw_setup_stream;
+	spec->ops.pin_cvt_fixup = i915_pin_cvt_fixup;
+
+	err = hdmi_parse_codec(codec);
+	if (err < 0) {
+		generic_spec_free(codec);
+		return err;
 	}
 
+	generic_hdmi_init_per_pins(codec);
+	register_i915_notifier(codec);
+	return 0;
+}
+
+/* Intel Baytrail and Braswell; with eld notifier */
+static int patch_i915_byt_hdmi(struct hda_codec *codec)
+{
+	struct hdmi_spec *spec;
+	int err;
+
+	/* requires i915 binding */
+	if (!codec->bus->core.audio_component) {
+		codec_info(codec, "No i915 binding for Intel HDMI/DP codec\n");
+		return -ENODEV;
+	}
+
+	err = alloc_generic_hdmi(codec);
+	if (err < 0)
+		return err;
+	spec = codec->spec;
+
 	/* For Valleyview/Cherryview, only the display codec is in the display
 	 * power well and can use link_power ops to request/release the power.
-	 * For Haswell/Broadwell, the controller is also in the power well and
-	 * can cover the codec power request, and so need not set this flag.
-	 * For previous platforms, there is no such power well feature.
 	 */
-	if (is_valleyview_plus(codec) || is_skylake(codec) ||
-			is_broxton(codec))
-		codec->core.link_power_control = 1;
+	codec->core.link_power_control = 1;
 
-	if (hdmi_parse_codec(codec) < 0) {
-		if (spec->i915_bound)
-			snd_hdac_i915_exit(&codec->bus->core);
-		codec->spec = NULL;
-		kfree(spec);
-		return -EINVAL;
-	}
-	codec->patch_ops = generic_hdmi_patch_ops;
-	if (is_haswell_plus(codec)) {
-		codec->patch_ops.set_power_state = haswell_set_power_state;
-		codec->dp_mst = true;
-	}
+	codec->depop_delay = 0;
+	codec->auto_runtime_pm = 1;
 
-	/* Enable runtime pm for HDMI audio codec of HSW/BDW/SKL/BYT/BSW */
-	if (is_haswell_plus(codec) || is_valleyview_plus(codec))
-		codec->auto_runtime_pm = 1;
+	spec->ops.pin_cvt_fixup = i915_pin_cvt_fixup;
+
+	err = hdmi_parse_codec(codec);
+	if (err < 0) {
+		generic_spec_free(codec);
+		return err;
+	}
 
 	generic_hdmi_init_per_pins(codec);
-
-
-	if (codec_has_acomp(codec)) {
-		codec->depop_delay = 0;
-		spec->i915_audio_ops.audio_ptr = codec;
-		/* intel_audio_codec_enable() or intel_audio_codec_disable()
-		 * will call pin_eld_notify with using audio_ptr pointer
-		 * We need make sure audio_ptr is really setup
-		 */
-		wmb();
-		spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-		snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
-	}
-
-	WARN_ON(spec->dyn_pcm_assign && !codec_has_acomp(codec));
+	register_i915_notifier(codec);
 	return 0;
 }
 
+/* Intel IronLake, SandyBridge and IvyBridge; with eld notifier */
+static int patch_i915_cpt_hdmi(struct hda_codec *codec)
+{
+	struct hdmi_spec *spec;
+	int err;
+
+	/* no i915 component should have been bound before this */
+	if (WARN_ON(codec->bus->core.audio_component))
+		return -EBUSY;
+
+	err = alloc_generic_hdmi(codec);
+	if (err < 0)
+		return err;
+	spec = codec->spec;
+
+	/* Try to bind with i915 now */
+	err = snd_hdac_i915_init(&codec->bus->core);
+	if (err < 0)
+		goto error;
+	spec->i915_bound = true;
+
+	err = hdmi_parse_codec(codec);
+	if (err < 0)
+		goto error;
+
+	generic_hdmi_init_per_pins(codec);
+	register_i915_notifier(codec);
+	return 0;
+
+ error:
+	generic_spec_free(codec);
+	return err;
+}
+
 /*
  * Shared non-generic implementations
  */
@@ -3492,21 +3640,21 @@
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",	patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f84, "VX11 HDMI/DP",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x11069f85, "VX11 HDMI/DP",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80860054, "IbexPeak HDMI",	patch_generic_hdmi),
+HDA_CODEC_ENTRY(0x80860054, "IbexPeak HDMI",	patch_i915_cpt_hdmi),
 HDA_CODEC_ENTRY(0x80862801, "Bearlake HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862802, "Cantiga HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862803, "Eaglelake HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862804, "IbexPeak HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862805, "CougarPoint HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862806, "PantherPoint HDMI", patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862807, "Haswell HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_generic_hdmi),
+HDA_CODEC_ENTRY(0x80862804, "IbexPeak HDMI",	patch_i915_cpt_hdmi),
+HDA_CODEC_ENTRY(0x80862805, "CougarPoint HDMI",	patch_i915_cpt_hdmi),
+HDA_CODEC_ENTRY(0x80862806, "PantherPoint HDMI", patch_i915_cpt_hdmi),
+HDA_CODEC_ENTRY(0x80862807, "Haswell HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_generic_hdmi),
-HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_generic_hdmi),
+HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_i915_byt_hdmi),
+HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x808629fb, "Crestline HDMI",	patch_generic_hdmi),
 /* special ID for generic HDMI */
 HDA_CODEC_ENTRY(HDA_CODEC_ID_GENERIC_HDMI, "Generic HDMI", patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4918ffa..002f153 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -342,6 +342,11 @@
 	case 0x10ec0293:
 		alc_update_coef_idx(codec, 0xa, 1<<13, 0);
 		break;
+	case 0x10ec0234:
+	case 0x10ec0274:
+	case 0x10ec0294:
+		alc_update_coef_idx(codec, 0x10, 1<<15, 0);
+		break;
 	case 0x10ec0662:
 		if ((coef & 0x00f0) == 0x0030)
 			alc_update_coef_idx(codec, 0x4, 1<<10, 0); /* EAPD Ctrl */
@@ -2647,6 +2652,7 @@
 	ALC269_TYPE_ALC255,
 	ALC269_TYPE_ALC256,
 	ALC269_TYPE_ALC225,
+	ALC269_TYPE_ALC294,
 };
 
 /*
@@ -2677,6 +2683,7 @@
 	case ALC269_TYPE_ALC255:
 	case ALC269_TYPE_ALC256:
 	case ALC269_TYPE_ALC225:
+	case ALC269_TYPE_ALC294:
 		ssids = alc269_ssids;
 		break;
 	default:
@@ -6028,6 +6035,11 @@
 	case 0x10ec0225:
 		spec->codec_variant = ALC269_TYPE_ALC225;
 		break;
+	case 0x10ec0234:
+	case 0x10ec0274:
+	case 0x10ec0294:
+		spec->codec_variant = ALC269_TYPE_ALC294;
+		break;
 	}
 
 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6942,6 +6954,7 @@
 	HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269),
@@ -6952,6 +6965,7 @@
 	HDA_CODEC_ENTRY(0x10ec0269, "ALC269", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0270, "ALC270", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0272, "ALC272", patch_alc662),
+	HDA_CODEC_ENTRY(0x10ec0274, "ALC274", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0275, "ALC275", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0276, "ALC276", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0280, "ALC280", patch_alc269),
@@ -6964,6 +6978,7 @@
 	HDA_CODEC_ENTRY(0x10ec0290, "ALC290", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0292, "ALC292", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0293, "ALC293", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0294, "ALC294", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
 	HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
 	HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 8151318..9720a30 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -42,12 +42,6 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 
-#ifdef CONFIG_KVM_GUEST
-#include <linux/kvm_para.h>
-#else
-#define kvm_para_available() (0)
-#endif
-
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; SiS 7012; Ali 5455");
 MODULE_LICENSE("GPL");
@@ -2972,25 +2966,17 @@
 		goto fini;
 	}
 
-	/* detect KVM and Parallels virtual environments */
-	result = kvm_para_available();
-#ifdef X86_FEATURE_HYPERVISOR
-	result = result || boot_cpu_has(X86_FEATURE_HYPERVISOR);
-#endif
-	if (!result)
-		goto fini;
-
 	/* check for known (emulated) devices */
+	result = 0;
 	if (pci->subsystem_vendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
 	    pci->subsystem_device == PCI_SUBDEVICE_ID_QEMU) {
 		/* KVM emulated sound, PCI SSID: 1af4:1100 */
 		msg = "enable KVM";
+		result = 1;
 	} else if (pci->subsystem_vendor == 0x1ab8) {
 		/* Parallels VM emulated sound, PCI SSID: 1ab8:xxxx */
 		msg = "enable Parallels VM";
-	} else {
-		msg = "disable (unknown or VT-d) VM";
-		result = 0;
+		result = 1;
 	}
 
 fini:
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
index f3d6202..a80684b 100644
--- a/sound/pci/lx6464es/lx_core.c
+++ b/sound/pci/lx6464es/lx_core.c
@@ -644,7 +644,7 @@
 		if (err < 0)
 			return err;
 
-		if (current_state == state)
+		if (!err && current_state == state)
 			return 0;
 
 		mdelay(1);
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index 2768970..1267e1a 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -652,7 +652,7 @@
 		rcmr =	  SSC_BF(RCMR_PERIOD, ssc_p->rcmr_period)
 			| SSC_BF(RCMR_STTDLY, 1)
 			| SSC_BF(RCMR_START, SSC_START_RISING_RF)
-			| SSC_BF(RCMR_CKI, SSC_CKI_FALLING)
+			| SSC_BF(RCMR_CKI, SSC_CKI_RISING)
 			| SSC_BF(RCMR_CKO, SSC_CKO_NONE)
 			| SSC_BF(RCMR_CKS, SSC_CKS_DIV);
 
@@ -692,7 +692,7 @@
 		rcmr =	  SSC_BF(RCMR_PERIOD, 0)
 			| SSC_BF(RCMR_STTDLY, START_DELAY)
 			| SSC_BF(RCMR_START, SSC_START_RISING_RF)
-			| SSC_BF(RCMR_CKI, SSC_CKI_FALLING)
+			| SSC_BF(RCMR_CKI, SSC_CKI_RISING)
 			| SSC_BF(RCMR_CKO, SSC_CKO_NONE)
 			| SSC_BF(RCMR_CKS, ssc->clk_from_rk_pin ?
 					   SSC_CKS_PIN : SSC_CKS_CLOCK);
diff --git a/sound/soc/au1x/dbdma2.c b/sound/soc/au1x/dbdma2.c
index 5741c0a..b5d1caa 100644
--- a/sound/soc/au1x/dbdma2.c
+++ b/sound/soc/au1x/dbdma2.c
@@ -206,8 +206,8 @@
 	stype = substream->stream;
 	pcd = to_dmadata(substream);
 
-	DBG("runtime->dma_area = 0x%08lx dma_addr_t = 0x%08lx dma_size = %d "
-	    "runtime->min_align %d\n",
+	DBG("runtime->dma_area = 0x%08lx dma_addr_t = 0x%08lx dma_size = %zu "
+	    "runtime->min_align %lu\n",
 		(unsigned long)runtime->dma_area,
 		(unsigned long)runtime->dma_addr, runtime->dma_bytes,
 		runtime->min_align);
diff --git a/sound/soc/bcm/bcm2835-i2s.c b/sound/soc/bcm/bcm2835-i2s.c
index 1c1f221..6ba2049 100644
--- a/sound/soc/bcm/bcm2835-i2s.c
+++ b/sound/soc/bcm/bcm2835-i2s.c
@@ -259,6 +259,9 @@
 	case SNDRV_PCM_FORMAT_S16_LE:
 		data_length = 16;
 		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		data_length = 24;
+		break;
 	case SNDRV_PCM_FORMAT_S32_LE:
 		data_length = 32;
 		break;
@@ -273,13 +276,20 @@
 		/* otherwise calculate a fitting block ratio */
 		bclk_ratio = 2 * data_length;
 
-	/* set target clock rate*/
-	clk_set_rate(dev->clk, sampling_rate * bclk_ratio);
+	/* Clock should only be set up here if CPU is clock master */
+	switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+	case SND_SOC_DAIFMT_CBS_CFM:
+		clk_set_rate(dev->clk, sampling_rate * bclk_ratio);
+		break;
+	default:
+		break;
+	}
 
 	/* Setup the frame format */
 	format = BCM2835_I2S_CHEN;
 
-	if (data_length > 24)
+	if (data_length >= 24)
 		format |= BCM2835_I2S_CHWEX;
 
 	format |= BCM2835_I2S_CHWID((data_length-8)&0xf);
@@ -570,6 +580,7 @@
 		.channels_max = 2,
 		.rates =	SNDRV_PCM_RATE_8000_192000,
 		.formats =	SNDRV_PCM_FMTBIT_S16_LE
+				| SNDRV_PCM_FMTBIT_S24_LE
 				| SNDRV_PCM_FMTBIT_S32_LE
 		},
 	.capture = {
@@ -577,6 +588,7 @@
 		.channels_max = 2,
 		.rates =	SNDRV_PCM_RATE_8000_192000,
 		.formats =	SNDRV_PCM_FMTBIT_S16_LE
+				| SNDRV_PCM_FMTBIT_S24_LE
 				| SNDRV_PCM_FMTBIT_S32_LE
 		},
 	.ops = &bcm2835_i2s_dai_ops,
@@ -678,6 +690,15 @@
 	dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].maxburst = 2;
 	dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].maxburst = 2;
 
+	/*
+	 * Set the PACK flag to enable S16_LE support (2 S16_LE values
+	 * packed into 32-bit transfers).
+	 */
+	dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].flags =
+		SND_DMAENGINE_PCM_DAI_FLAG_PACK;
+	dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].flags =
+		SND_DMAENGINE_PCM_DAI_FLAG_PACK;
+
 	/* BCLK ratio - use default */
 	dev->bclk_ratio = 0;
 
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 7ef3a0c..b3afae9 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -88,12 +88,14 @@
 	select SND_SOC_MC13783 if MFD_MC13XXX
 	select SND_SOC_ML26124 if I2C
 	select SND_SOC_NAU8825 if I2C
+	select SND_SOC_HDMI_CODEC
 	select SND_SOC_PCM1681 if I2C
 	select SND_SOC_PCM179X_I2C if I2C
 	select SND_SOC_PCM179X_SPI if SPI_MASTER
 	select SND_SOC_PCM3008
 	select SND_SOC_PCM3168A_I2C if I2C
 	select SND_SOC_PCM3168A_SPI if SPI_MASTER
+	select SND_SOC_PCM5102A
 	select SND_SOC_PCM512x_I2C if I2C
 	select SND_SOC_PCM512x_SPI if SPI_MASTER
 	select SND_SOC_RT286 if I2C
@@ -477,6 +479,11 @@
 config SND_SOC_DMIC
 	tristate
 
+config SND_SOC_HDMI_CODEC
+       tristate
+       select SND_PCM_ELD
+       select SND_PCM_IEC958
+
 config SND_SOC_ES8328
 	tristate "Everest Semi ES8328 CODEC"
 
@@ -575,6 +582,9 @@
 	select SND_SOC_PCM3168A
 	select REGMAP_SPI
 
+config SND_SOC_PCM5102A
+	tristate
+
 config SND_SOC_PCM512x
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 185a712..b7b9941 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -81,6 +81,7 @@
 snd-soc-mc13783-objs := mc13783.o
 snd-soc-ml26124-objs := ml26124.o
 snd-soc-nau8825-objs := nau8825.o
+snd-soc-hdmi-codec-objs := hdmi-codec.o
 snd-soc-pcm1681-objs := pcm1681.o
 snd-soc-pcm179x-codec-objs := pcm179x.o
 snd-soc-pcm179x-i2c-objs := pcm179x-i2c.o
@@ -89,6 +90,7 @@
 snd-soc-pcm3168a-objs := pcm3168a.o
 snd-soc-pcm3168a-i2c-objs := pcm3168a-i2c.o
 snd-soc-pcm3168a-spi-objs := pcm3168a-spi.o
+snd-soc-pcm5102a-objs := pcm5102a.o
 snd-soc-pcm512x-objs := pcm512x.o
 snd-soc-pcm512x-i2c-objs := pcm512x-i2c.o
 snd-soc-pcm512x-spi-objs := pcm512x-spi.o
@@ -290,6 +292,7 @@
 obj-$(CONFIG_SND_SOC_MC13783)	+= snd-soc-mc13783.o
 obj-$(CONFIG_SND_SOC_ML26124)	+= snd-soc-ml26124.o
 obj-$(CONFIG_SND_SOC_NAU8825)   += snd-soc-nau8825.o
+obj-$(CONFIG_SND_SOC_HDMI_CODEC)	+= snd-soc-hdmi-codec.o
 obj-$(CONFIG_SND_SOC_PCM1681)	+= snd-soc-pcm1681.o
 obj-$(CONFIG_SND_SOC_PCM179X)	+= snd-soc-pcm179x-codec.o
 obj-$(CONFIG_SND_SOC_PCM179X_I2C)	+= snd-soc-pcm179x-i2c.o
@@ -298,6 +301,7 @@
 obj-$(CONFIG_SND_SOC_PCM3168A)	+= snd-soc-pcm3168a.o
 obj-$(CONFIG_SND_SOC_PCM3168A_I2C)	+= snd-soc-pcm3168a-i2c.o
 obj-$(CONFIG_SND_SOC_PCM3168A_SPI)	+= snd-soc-pcm3168a-spi.o
+obj-$(CONFIG_SND_SOC_PCM5102A)	+= snd-soc-pcm5102a.o
 obj-$(CONFIG_SND_SOC_PCM512x)	+= snd-soc-pcm512x.o
 obj-$(CONFIG_SND_SOC_PCM512x_I2C)	+= snd-soc-pcm512x-i2c.o
 obj-$(CONFIG_SND_SOC_PCM512x_SPI)	+= snd-soc-pcm512x-spi.o
diff --git a/sound/soc/codecs/ak4642.c b/sound/soc/codecs/ak4642.c
index cda27c2..1ee8506 100644
--- a/sound/soc/codecs/ak4642.c
+++ b/sound/soc/codecs/ak4642.c
@@ -608,9 +608,7 @@
 
 	of_property_read_string(np, "clock-output-names", &clk_name);
 
-	clk = clk_register_fixed_rate(dev, clk_name, parent_clk_name,
-				      (parent_clk_name) ? 0 : CLK_IS_ROOT,
-				      rate);
+	clk = clk_register_fixed_rate(dev, clk_name, parent_clk_name, 0, rate);
 	if (!IS_ERR(clk))
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 8395931..664a8c0 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -221,6 +221,8 @@
 
 	switch (arizona->type) {
 	case WM8997:
+	case CS47L24:
+	case WM1831:
 		break;
 	default:
 		ret = snd_soc_dapm_new_controls(dapm, &arizona_spkr, 1);
@@ -1134,7 +1136,6 @@
 		   int event)
 {
 	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
-	unsigned int mask = 0x3 << w->shift;
 	unsigned int val;
 
 	switch (event) {
@@ -1148,7 +1149,7 @@
 		return 0;
 	}
 
-	snd_soc_update_bits(codec, ARIZONA_CLOCK_CONTROL, mask, val);
+	snd_soc_write(codec, ARIZONA_CLOCK_CONTROL, val);
 
 	return 0;
 }
@@ -2047,7 +2048,21 @@
 			init_ratio, Fref, refdiv);
 
 	while (div <= ARIZONA_FLL_MAX_REFDIV) {
-		for (ratio = init_ratio; ratio <= ARIZONA_FLL_MAX_FRATIO;
+		/* start from init_ratio because this may already give a
+		 * fractional N.K
+		 */
+		for (ratio = init_ratio; ratio > 0; ratio--) {
+			if (target % (ratio * Fref)) {
+				cfg->refdiv = refdiv;
+				cfg->fratio = ratio - 1;
+				arizona_fll_dbg(fll,
+					"pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
+					Fref, refdiv, div, ratio);
+				return ratio;
+			}
+		}
+
+		for (ratio = init_ratio + 1; ratio <= ARIZONA_FLL_MAX_FRATIO;
 		     ratio++) {
 			if ((ARIZONA_FLL_VCO_CORNER / 2) /
 			    (fll->vco_mult * ratio) < Fref) {
@@ -2073,17 +2088,6 @@
 			}
 		}
 
-		for (ratio = init_ratio - 1; ratio > 0; ratio--) {
-			if (target % (ratio * Fref)) {
-				cfg->refdiv = refdiv;
-				cfg->fratio = ratio - 1;
-				arizona_fll_dbg(fll,
-					"pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
-					Fref, refdiv, div, ratio);
-				return ratio;
-			}
-		}
-
 		div *= 2;
 		Fref /= 2;
 		refdiv++;
diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c
index 7cd5f76..eec1ff8 100644
--- a/sound/soc/codecs/cs42l56.c
+++ b/sound/soc/codecs/cs42l56.c
@@ -56,7 +56,7 @@
 	u8 iface;
 	u8 iface_fmt;
 	u8 iface_inv;
-#if defined(CONFIG_INPUT) || defined(CONFIG_INPUT_MODULE)
+#if IS_ENABLED(CONFIG_INPUT)
 	struct input_dev *beep;
 	struct work_struct beep_work;
 	int beep_rate;
diff --git a/sound/soc/codecs/cs47l24.c b/sound/soc/codecs/cs47l24.c
index 00e9b6fc..5ec5a68 100644
--- a/sound/soc/codecs/cs47l24.c
+++ b/sound/soc/codecs/cs47l24.c
@@ -807,6 +807,9 @@
 	{ "IN2L PGA", NULL, "IN2L" },
 	{ "IN2R PGA", NULL, "IN2R" },
 
+	{ "Audio Trace DSP", NULL, "DSP2" },
+	{ "Audio Trace DSP", NULL, "SYSCLK" },
+
 	ARIZONA_MIXER_ROUTES("OUT1L", "HPOUT1L"),
 	ARIZONA_MIXER_ROUTES("OUT1R", "HPOUT1R"),
 
@@ -1016,6 +1019,27 @@
 			.formats = CS47L24_FORMATS,
 		},
 	},
+	{
+		.name = "cs47l24-cpu-trace",
+		.capture = {
+			.stream_name = "Audio Trace CPU",
+			.channels_min = 1,
+			.channels_max = 6,
+			.rates = CS47L24_RATES,
+			.formats = CS47L24_FORMATS,
+		},
+		.compress_new = snd_soc_new_compress,
+	},
+	{
+		.name = "cs47l24-dsp-trace",
+		.capture = {
+			.stream_name = "Audio Trace DSP",
+			.channels_min = 1,
+			.channels_max = 6,
+			.rates = CS47L24_RATES,
+			.formats = CS47L24_FORMATS,
+		},
+	},
 };
 
 static int cs47l24_open(struct snd_compr_stream *stream)
@@ -1027,6 +1051,8 @@
 
 	if (strcmp(rtd->codec_dai->name, "cs47l24-dsp-voicectrl") == 0) {
 		n_adsp = 2;
+	} else if (strcmp(rtd->codec_dai->name, "cs47l24-dsp-trace") == 0) {
+		n_adsp = 1;
 	} else {
 		dev_err(arizona->dev,
 			"No suitable compressed stream for DAI '%s'\n",
@@ -1041,10 +1067,16 @@
 {
 	struct cs47l24_priv *priv = data;
 	struct arizona *arizona = priv->core.arizona;
-	int ret;
+	int serviced = 0;
+	int i, ret;
 
-	ret = wm_adsp_compr_handle_irq(&priv->core.adsp[2]);
-	if (ret == -ENODEV) {
+	for (i = 1; i <= 2; ++i) {
+		ret = wm_adsp_compr_handle_irq(&priv->core.adsp[i]);
+		if (ret != -ENODEV)
+			serviced++;
+	}
+
+	if (!serviced) {
 		dev_err(arizona->dev, "Spurious compressed data IRQ\n");
 		return IRQ_NONE;
 	}
@@ -1160,6 +1192,7 @@
 static struct snd_soc_platform_driver cs47l24_compr_platform = {
 	.compr_ops = &cs47l24_compr_ops,
 };
+
 static int cs47l24_probe(struct platform_device *pdev)
 {
 	struct arizona *arizona = dev_get_drvdata(pdev->dev.parent);
@@ -1228,9 +1261,9 @@
 		dev_err(&pdev->dev, "Failed to register platform: %d\n", ret);
 		return ret;
 	}
+
 	ret = snd_soc_register_codec(&pdev->dev, &soc_codec_dev_cs47l24,
 				      cs47l24_dai, ARRAY_SIZE(cs47l24_dai));
-
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to register codec: %d\n", ret);
 		snd_soc_unregister_platform(&pdev->dev);
@@ -1241,10 +1274,15 @@
 
 static int cs47l24_remove(struct platform_device *pdev)
 {
+	struct cs47l24_priv *cs47l24 = platform_get_drvdata(pdev);
+
 	snd_soc_unregister_platform(&pdev->dev);
 	snd_soc_unregister_codec(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	wm_adsp2_remove(&cs47l24->core.adsp[1]);
+	wm_adsp2_remove(&cs47l24->core.adsp[2]);
+
 	return 0;
 }
 
diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index 7278f93..e5527bc 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -726,6 +726,68 @@
 
 
 /*
+ * DAPM Events
+ */
+
+static int da7213_dai_event(struct snd_soc_dapm_widget *w,
+			    struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
+	struct da7213_priv *da7213 = snd_soc_codec_get_drvdata(codec);
+	u8 pll_ctrl, pll_status;
+	int i = 0;
+	bool srm_lock = false;
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* Enable DAI clks for master mode */
+		if (da7213->master)
+			snd_soc_update_bits(codec, DA7213_DAI_CLK_MODE,
+					    DA7213_DAI_CLK_EN_MASK,
+					    DA7213_DAI_CLK_EN_MASK);
+
+		/* PC synchronised to DAI */
+		snd_soc_update_bits(codec, DA7213_PC_COUNT,
+				    DA7213_PC_FREERUN_MASK, 0);
+
+		/* Slave mode, if SRM not enabled no need for status checks */
+		pll_ctrl = snd_soc_read(codec, DA7213_PLL_CTRL);
+		if (!(pll_ctrl & DA7213_PLL_SRM_EN))
+			return 0;
+
+		/* Check SRM has locked */
+		do {
+			pll_status = snd_soc_read(codec, DA7213_PLL_STATUS);
+			if (pll_status & DA7219_PLL_SRM_LOCK) {
+				srm_lock = true;
+			} else {
+				++i;
+				msleep(50);
+			}
+		} while ((i < DA7213_SRM_CHECK_RETRIES) & (!srm_lock));
+
+		if (!srm_lock)
+			dev_warn(codec->dev, "SRM failed to lock\n");
+
+		return 0;
+	case SND_SOC_DAPM_POST_PMD:
+		/* PC free-running */
+		snd_soc_update_bits(codec, DA7213_PC_COUNT,
+				    DA7213_PC_FREERUN_MASK,
+				    DA7213_PC_FREERUN_MASK);
+
+		/* Disable DAI clks if in master mode */
+		if (da7213->master)
+			snd_soc_update_bits(codec, DA7213_DAI_CLK_MODE,
+					    DA7213_DAI_CLK_EN_MASK, 0);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+
+/*
  * DAPM widgets
  */
 
@@ -736,7 +798,8 @@
 
 	/* Use a supply here as this controls both input & output DAIs */
 	SND_SOC_DAPM_SUPPLY("DAI", DA7213_DAI_CTRL, DA7213_DAI_EN_SHIFT,
-			    DA7213_NO_INVERT, NULL, 0),
+			    DA7213_NO_INVERT, da7213_dai_event,
+			    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
 
 	/*
 	 * Input
@@ -1143,11 +1206,9 @@
 	/* Set master/slave mode */
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 	case SND_SOC_DAIFMT_CBM_CFM:
-		dai_clk_mode |= DA7213_DAI_CLK_EN_MASTER_MODE;
 		da7213->master = true;
 		break;
 	case SND_SOC_DAIFMT_CBS_CFS:
-		dai_clk_mode |= DA7213_DAI_CLK_EN_SLAVE_MODE;
 		da7213->master = false;
 		break;
 	default:
@@ -1281,28 +1342,28 @@
 	pll_ctrl = 0;
 
 	/* Workout input divider based on MCLK rate */
-	if ((da7213->mclk_rate == 32768) && (source == DA7213_SYSCLK_PLL)) {
+	if (da7213->mclk_rate == 32768) {
 		/* 32KHz PLL Mode */
-		indiv_bits = DA7213_PLL_INDIV_10_20_MHZ;
-		indiv = DA7213_PLL_INDIV_10_20_MHZ_VAL;
+		indiv_bits = DA7213_PLL_INDIV_9_TO_18_MHZ;
+		indiv = DA7213_PLL_INDIV_9_TO_18_MHZ_VAL;
 		freq_ref = 3750000;
 		pll_ctrl |= DA7213_PLL_32K_MODE;
 	} else {
 		/* 5 - 54MHz MCLK */
 		if (da7213->mclk_rate < 5000000) {
 			goto pll_err;
-		} else if (da7213->mclk_rate <= 10000000) {
-			indiv_bits = DA7213_PLL_INDIV_5_10_MHZ;
-			indiv = DA7213_PLL_INDIV_5_10_MHZ_VAL;
-		} else if (da7213->mclk_rate <= 20000000) {
-			indiv_bits = DA7213_PLL_INDIV_10_20_MHZ;
-			indiv = DA7213_PLL_INDIV_10_20_MHZ_VAL;
-		} else if (da7213->mclk_rate <= 40000000) {
-			indiv_bits = DA7213_PLL_INDIV_20_40_MHZ;
-			indiv = DA7213_PLL_INDIV_20_40_MHZ_VAL;
+		} else if (da7213->mclk_rate <= 9000000) {
+			indiv_bits = DA7213_PLL_INDIV_5_TO_9_MHZ;
+			indiv = DA7213_PLL_INDIV_5_TO_9_MHZ_VAL;
+		} else if (da7213->mclk_rate <= 18000000) {
+			indiv_bits = DA7213_PLL_INDIV_9_TO_18_MHZ;
+			indiv = DA7213_PLL_INDIV_9_TO_18_MHZ_VAL;
+		} else if (da7213->mclk_rate <= 36000000) {
+			indiv_bits = DA7213_PLL_INDIV_18_TO_36_MHZ;
+			indiv = DA7213_PLL_INDIV_18_TO_36_MHZ_VAL;
 		} else if (da7213->mclk_rate <= 54000000) {
-			indiv_bits = DA7213_PLL_INDIV_40_54_MHZ;
-			indiv = DA7213_PLL_INDIV_40_54_MHZ_VAL;
+			indiv_bits = DA7213_PLL_INDIV_36_TO_54_MHZ;
+			indiv = DA7213_PLL_INDIV_36_TO_54_MHZ_VAL;
 		} else {
 			goto pll_err;
 		}
@@ -1547,6 +1608,10 @@
 	/* Default to using SRM for slave mode */
 	da7213->srm_en = true;
 
+	/* Default PC counter to free-running */
+	snd_soc_update_bits(codec, DA7213_PC_COUNT, DA7213_PC_FREERUN_MASK,
+			    DA7213_PC_FREERUN_MASK);
+
 	/* Enable all Gain Ramps */
 	snd_soc_update_bits(codec, DA7213_AUX_L_CTRL,
 			    DA7213_GAIN_RAMP_EN, DA7213_GAIN_RAMP_EN);
diff --git a/sound/soc/codecs/da7213.h b/sound/soc/codecs/da7213.h
index 030fd69..fbb7a35 100644
--- a/sound/soc/codecs/da7213.h
+++ b/sound/soc/codecs/da7213.h
@@ -142,6 +142,9 @@
  * Bit fields
  */
 
+/* DA7213_PLL_STATUS = 0x03 */
+#define DA7219_PLL_SRM_LOCK					(0x1 << 1)
+
 /* DA7213_SR = 0x22 */
 #define DA7213_SR_8000						(0x1 << 0)
 #define DA7213_SR_11025						(0x2 << 0)
@@ -160,10 +163,10 @@
 #define DA7213_VMID_EN						(0x1 << 7)
 
 /* DA7213_PLL_CTRL = 0x27 */
-#define DA7213_PLL_INDIV_5_10_MHZ				(0x0 << 2)
-#define DA7213_PLL_INDIV_10_20_MHZ				(0x1 << 2)
-#define DA7213_PLL_INDIV_20_40_MHZ				(0x2 << 2)
-#define DA7213_PLL_INDIV_40_54_MHZ				(0x3 << 2)
+#define DA7213_PLL_INDIV_5_TO_9_MHZ				(0x0 << 2)
+#define DA7213_PLL_INDIV_9_TO_18_MHZ				(0x1 << 2)
+#define DA7213_PLL_INDIV_18_TO_36_MHZ				(0x2 << 2)
+#define DA7213_PLL_INDIV_36_TO_54_MHZ				(0x3 << 2)
 #define DA7213_PLL_INDIV_MASK					(0x3 << 2)
 #define DA7213_PLL_MCLK_SQR_EN					(0x1 << 4)
 #define DA7213_PLL_32K_MODE					(0x1 << 5)
@@ -178,8 +181,6 @@
 #define DA7213_DAI_BCLKS_PER_WCLK_MASK				(0x3 << 0)
 #define DA7213_DAI_CLK_POL_INV					(0x1 << 2)
 #define DA7213_DAI_WCLK_POL_INV					(0x1 << 3)
-#define DA7213_DAI_CLK_EN_SLAVE_MODE				(0x0 << 7)
-#define DA7213_DAI_CLK_EN_MASTER_MODE				(0x1 << 7)
 #define DA7213_DAI_CLK_EN_MASK					(0x1 << 7)
 
 /* DA7213_DAI_CTRL = 0x29 */
@@ -412,6 +413,9 @@
 #define DA7213_DMIC_CLK_RATE_SHIFT				2
 #define DA7213_DMIC_CLK_RATE_MASK				(0x1 << 2)
 
+/* DA7213_PC_COUNT = 0x94 */
+#define DA7213_PC_FREERUN_MASK					(0x1 << 0)
+
 /* DA7213_DIG_CTRL = 0x99 */
 #define DA7213_DAC_L_INV_SHIFT					3
 #define DA7213_DAC_R_INV_SHIFT					7
@@ -495,15 +499,16 @@
 #define DA7213_ALC_AVG_ITERATIONS	5
 
 /* PLL related */
-#define DA7213_SYSCLK_MCLK		0
-#define DA7213_SYSCLK_PLL		1
-#define DA7213_PLL_FREQ_OUT_90316800	90316800
-#define DA7213_PLL_FREQ_OUT_98304000	98304000
-#define DA7213_PLL_FREQ_OUT_94310400	94310400
-#define DA7213_PLL_INDIV_5_10_MHZ_VAL	2
-#define DA7213_PLL_INDIV_10_20_MHZ_VAL	4
-#define DA7213_PLL_INDIV_20_40_MHZ_VAL	8
-#define DA7213_PLL_INDIV_40_54_MHZ_VAL	16
+#define DA7213_SYSCLK_MCLK			0
+#define DA7213_SYSCLK_PLL			1
+#define DA7213_PLL_FREQ_OUT_90316800		90316800
+#define DA7213_PLL_FREQ_OUT_98304000		98304000
+#define DA7213_PLL_FREQ_OUT_94310400		94310400
+#define DA7213_PLL_INDIV_5_TO_9_MHZ_VAL		2
+#define DA7213_PLL_INDIV_9_TO_18_MHZ_VAL	4
+#define DA7213_PLL_INDIV_18_TO_36_MHZ_VAL	8
+#define DA7213_PLL_INDIV_36_TO_54_MHZ_VAL	16
+#define DA7213_SRM_CHECK_RETRIES		8
 
 enum da7213_clk_src {
 	DA7213_CLKSRC_MCLK = 0,
diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c
index 93575f2..99ce23e 100644
--- a/sound/soc/codecs/da7218.c
+++ b/sound/soc/codecs/da7218.c
@@ -1868,27 +1868,27 @@
 
 	/* Verify 32KHz, 2MHz - 54MHz MCLK provided, and set input divider */
 	if (da7218->mclk_rate == 32768) {
-		indiv_bits = DA7218_PLL_INDIV_2_5_MHZ;
-		indiv = DA7218_PLL_INDIV_2_10_MHZ_VAL;
+		indiv_bits = DA7218_PLL_INDIV_9_TO_18_MHZ;
+		indiv = DA7218_PLL_INDIV_9_TO_18_MHZ_VAL;
 	} else if (da7218->mclk_rate < 2000000) {
 		dev_err(codec->dev, "PLL input clock %d below valid range\n",
 			da7218->mclk_rate);
 		return -EINVAL;
-	} else if (da7218->mclk_rate <= 5000000) {
-		indiv_bits = DA7218_PLL_INDIV_2_5_MHZ;
-		indiv = DA7218_PLL_INDIV_2_10_MHZ_VAL;
-	} else if (da7218->mclk_rate <= 10000000) {
-		indiv_bits = DA7218_PLL_INDIV_5_10_MHZ;
-		indiv = DA7218_PLL_INDIV_2_10_MHZ_VAL;
-	} else if (da7218->mclk_rate <= 20000000) {
-		indiv_bits = DA7218_PLL_INDIV_10_20_MHZ;
-		indiv = DA7218_PLL_INDIV_10_20_MHZ_VAL;
-	} else if (da7218->mclk_rate <= 40000000) {
-		indiv_bits = DA7218_PLL_INDIV_20_40_MHZ;
-		indiv = DA7218_PLL_INDIV_20_40_MHZ_VAL;
+	} else if (da7218->mclk_rate <= 4500000) {
+		indiv_bits = DA7218_PLL_INDIV_2_TO_4_5_MHZ;
+		indiv = DA7218_PLL_INDIV_2_TO_4_5_MHZ_VAL;
+	} else if (da7218->mclk_rate <= 9000000) {
+		indiv_bits = DA7218_PLL_INDIV_4_5_TO_9_MHZ;
+		indiv = DA7218_PLL_INDIV_4_5_TO_9_MHZ_VAL;
+	} else if (da7218->mclk_rate <= 18000000) {
+		indiv_bits = DA7218_PLL_INDIV_9_TO_18_MHZ;
+		indiv = DA7218_PLL_INDIV_9_TO_18_MHZ_VAL;
+	} else if (da7218->mclk_rate <= 36000000) {
+		indiv_bits = DA7218_PLL_INDIV_18_TO_36_MHZ;
+		indiv = DA7218_PLL_INDIV_18_TO_36_MHZ_VAL;
 	} else if (da7218->mclk_rate <= 54000000) {
-		indiv_bits = DA7218_PLL_INDIV_40_54_MHZ;
-		indiv = DA7218_PLL_INDIV_40_54_MHZ_VAL;
+		indiv_bits = DA7218_PLL_INDIV_36_TO_54_MHZ;
+		indiv = DA7218_PLL_INDIV_36_TO_54_MHZ_VAL;
 	} else {
 		dev_err(codec->dev, "PLL input clock %d above valid range\n",
 			da7218->mclk_rate);
diff --git a/sound/soc/codecs/da7218.h b/sound/soc/codecs/da7218.h
index c2c5904..477cd37 100644
--- a/sound/soc/codecs/da7218.h
+++ b/sound/soc/codecs/da7218.h
@@ -876,15 +876,11 @@
 /* DA7218_PLL_CTRL = 0x91 */
 #define DA7218_PLL_INDIV_SHIFT		0
 #define DA7218_PLL_INDIV_MASK		(0x7 << 0)
-#define DA7218_PLL_INDIV_2_5_MHZ	(0x0 << 0)
-#define DA7218_PLL_INDIV_5_10_MHZ	(0x1 << 0)
-#define DA7218_PLL_INDIV_10_20_MHZ	(0x2 << 0)
-#define DA7218_PLL_INDIV_20_40_MHZ	(0x3 << 0)
-#define DA7218_PLL_INDIV_40_54_MHZ	(0x4 << 0)
-#define DA7218_PLL_INDIV_2_10_MHZ_VAL	2
-#define DA7218_PLL_INDIV_10_20_MHZ_VAL	4
-#define DA7218_PLL_INDIV_20_40_MHZ_VAL	8
-#define DA7218_PLL_INDIV_40_54_MHZ_VAL	16
+#define DA7218_PLL_INDIV_2_TO_4_5_MHZ	(0x0 << 0)
+#define DA7218_PLL_INDIV_4_5_TO_9_MHZ	(0x1 << 0)
+#define DA7218_PLL_INDIV_9_TO_18_MHZ	(0x2 << 0)
+#define DA7218_PLL_INDIV_18_TO_36_MHZ	(0x3 << 0)
+#define DA7218_PLL_INDIV_36_TO_54_MHZ	(0x4 << 0)
 #define DA7218_PLL_MCLK_SQR_EN_SHIFT	4
 #define DA7218_PLL_MCLK_SQR_EN_MASK	(0x1 << 4)
 #define DA7218_PLL_MODE_SHIFT		6
@@ -1336,6 +1332,13 @@
 #define DA7218_PLL_FREQ_OUT_90316	90316800
 #define DA7218_PLL_FREQ_OUT_98304	98304000
 
+/* PLL Frequency Dividers */
+#define DA7218_PLL_INDIV_2_TO_4_5_MHZ_VAL	1
+#define DA7218_PLL_INDIV_4_5_TO_9_MHZ_VAL	2
+#define DA7218_PLL_INDIV_9_TO_18_MHZ_VAL	4
+#define DA7218_PLL_INDIV_18_TO_36_MHZ_VAL	8
+#define DA7218_PLL_INDIV_36_TO_54_MHZ_VAL	16
+
 /* ALC Calibration */
 #define DA7218_ALC_CALIB_DELAY_MIN	2500
 #define DA7218_ALC_CALIB_DELAY_MAX	5000
diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c
index 81c0708..5c93899 100644
--- a/sound/soc/codecs/da7219.c
+++ b/sound/soc/codecs/da7219.c
@@ -11,6 +11,7 @@
  * option) any later version.
  */
 
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/i2c.h>
 #include <linux/of_device.h>
@@ -1025,7 +1026,7 @@
 	if ((da7219->clk_src == clk_id) && (da7219->mclk_rate == freq))
 		return 0;
 
-	if (((freq < 2000000) && (freq != 32768)) || (freq > 54000000)) {
+	if ((freq < 2000000) || (freq > 54000000)) {
 		dev_err(codec_dai->dev, "Unsupported MCLK value %d\n",
 			freq);
 		return -EINVAL;
@@ -1079,21 +1080,21 @@
 		dev_err(codec->dev, "PLL input clock %d below valid range\n",
 			da7219->mclk_rate);
 		return -EINVAL;
-	} else if (da7219->mclk_rate <= 5000000) {
-		indiv_bits = DA7219_PLL_INDIV_2_5_MHZ;
-		indiv = DA7219_PLL_INDIV_2_5_MHZ_VAL;
-	} else if (da7219->mclk_rate <= 10000000) {
-		indiv_bits = DA7219_PLL_INDIV_5_10_MHZ;
-		indiv = DA7219_PLL_INDIV_5_10_MHZ_VAL;
-	} else if (da7219->mclk_rate <= 20000000) {
-		indiv_bits = DA7219_PLL_INDIV_10_20_MHZ;
-		indiv = DA7219_PLL_INDIV_10_20_MHZ_VAL;
-	} else if (da7219->mclk_rate <= 40000000) {
-		indiv_bits = DA7219_PLL_INDIV_20_40_MHZ;
-		indiv = DA7219_PLL_INDIV_20_40_MHZ_VAL;
+	} else if (da7219->mclk_rate <= 4500000) {
+		indiv_bits = DA7219_PLL_INDIV_2_TO_4_5_MHZ;
+		indiv = DA7219_PLL_INDIV_2_TO_4_5_MHZ_VAL;
+	} else if (da7219->mclk_rate <= 9000000) {
+		indiv_bits = DA7219_PLL_INDIV_4_5_TO_9_MHZ;
+		indiv = DA7219_PLL_INDIV_4_5_TO_9_MHZ_VAL;
+	} else if (da7219->mclk_rate <= 18000000) {
+		indiv_bits = DA7219_PLL_INDIV_9_TO_18_MHZ;
+		indiv = DA7219_PLL_INDIV_9_TO_18_MHZ_VAL;
+	} else if (da7219->mclk_rate <= 36000000) {
+		indiv_bits = DA7219_PLL_INDIV_18_TO_36_MHZ;
+		indiv = DA7219_PLL_INDIV_18_TO_36_MHZ_VAL;
 	} else if (da7219->mclk_rate <= 54000000) {
-		indiv_bits = DA7219_PLL_INDIV_40_54_MHZ;
-		indiv = DA7219_PLL_INDIV_40_54_MHZ_VAL;
+		indiv_bits = DA7219_PLL_INDIV_36_TO_54_MHZ;
+		indiv = DA7219_PLL_INDIV_36_TO_54_MHZ_VAL;
 	} else {
 		dev_err(codec->dev, "PLL input clock %d above valid range\n",
 			da7219->mclk_rate);
@@ -1426,6 +1427,12 @@
 };
 MODULE_DEVICE_TABLE(of, da7219_of_match);
 
+static const struct acpi_device_id da7219_acpi_match[] = {
+	{ .id = "DLGS7219", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, da7219_acpi_match);
+
 static enum da7219_micbias_voltage
 	da7219_of_micbias_lvl(struct snd_soc_codec *codec, u32 val)
 {
@@ -1955,6 +1962,7 @@
 	.driver = {
 		.name = "da7219",
 		.of_match_table = of_match_ptr(da7219_of_match),
+		.acpi_match_table = ACPI_PTR(da7219_acpi_match),
 	},
 	.probe		= da7219_i2c_probe,
 	.remove		= da7219_i2c_remove,
diff --git a/sound/soc/codecs/da7219.h b/sound/soc/codecs/da7219.h
index 5a787e7..ff2a2f0 100644
--- a/sound/soc/codecs/da7219.h
+++ b/sound/soc/codecs/da7219.h
@@ -194,11 +194,11 @@
 /* DA7219_PLL_CTRL = 0x20 */
 #define DA7219_PLL_INDIV_SHIFT		2
 #define DA7219_PLL_INDIV_MASK		(0x7 << 2)
-#define DA7219_PLL_INDIV_2_5_MHZ	(0x0 << 2)
-#define DA7219_PLL_INDIV_5_10_MHZ	(0x1 << 2)
-#define DA7219_PLL_INDIV_10_20_MHZ	(0x2 << 2)
-#define DA7219_PLL_INDIV_20_40_MHZ	(0x3 << 2)
-#define DA7219_PLL_INDIV_40_54_MHZ	(0x4 << 2)
+#define DA7219_PLL_INDIV_2_TO_4_5_MHZ	(0x0 << 2)
+#define DA7219_PLL_INDIV_4_5_TO_9_MHZ	(0x1 << 2)
+#define DA7219_PLL_INDIV_9_TO_18_MHZ	(0x2 << 2)
+#define DA7219_PLL_INDIV_18_TO_36_MHZ	(0x3 << 2)
+#define DA7219_PLL_INDIV_36_TO_54_MHZ	(0x4 << 2)
 #define DA7219_PLL_MCLK_SQR_EN_SHIFT	5
 #define DA7219_PLL_MCLK_SQR_EN_MASK	(0x1 << 5)
 #define DA7219_PLL_MODE_SHIFT		6
@@ -761,11 +761,11 @@
 #define DA7219_PLL_FREQ_OUT_98304	98304000
 
 /* PLL Frequency Dividers */
-#define DA7219_PLL_INDIV_2_5_MHZ_VAL	1
-#define DA7219_PLL_INDIV_5_10_MHZ_VAL	2
-#define DA7219_PLL_INDIV_10_20_MHZ_VAL	4
-#define DA7219_PLL_INDIV_20_40_MHZ_VAL	8
-#define DA7219_PLL_INDIV_40_54_MHZ_VAL	16
+#define DA7219_PLL_INDIV_2_TO_4_5_MHZ_VAL	1
+#define DA7219_PLL_INDIV_4_5_TO_9_MHZ_VAL	2
+#define DA7219_PLL_INDIV_9_TO_18_MHZ_VAL	4
+#define DA7219_PLL_INDIV_18_TO_36_MHZ_VAL	8
+#define DA7219_PLL_INDIV_36_TO_54_MHZ_VAL	16
 
 /* SRM */
 #define DA7219_SRM_CHECK_RETRIES	8
diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
index afa6c5d..2086d71 100644
--- a/sound/soc/codecs/es8328.c
+++ b/sound/soc/codecs/es8328.c
@@ -26,18 +26,30 @@
 #include <sound/tlv.h>
 #include "es8328.h"
 
-#define ES8328_SYSCLK_RATE_1X 11289600
-#define ES8328_SYSCLK_RATE_2X 22579200
+static const unsigned int rates_12288[] = {
+	8000, 12000, 16000, 24000, 32000, 48000, 96000,
+};
 
-/* Run the codec at 22.5792 or 11.2896 MHz to support these rates */
-static struct {
-	int rate;
-	u8 ratio;
-} mclk_ratios[] = {
-	{ 8000, 9 },
-	{11025, 7 },
-	{22050, 4 },
-	{44100, 2 },
+static const int ratios_12288[] = {
+	10, 7, 6, 4, 3, 2, 0,
+};
+
+static const struct snd_pcm_hw_constraint_list constraints_12288 = {
+	.count	= ARRAY_SIZE(rates_12288),
+	.list	= rates_12288,
+};
+
+static const unsigned int rates_11289[] = {
+	8018, 11025, 22050, 44100, 88200,
+};
+
+static const int ratios_11289[] = {
+	9, 7, 4, 2, 0,
+};
+
+static const struct snd_pcm_hw_constraint_list constraints_11289 = {
+	.count	= ARRAY_SIZE(rates_11289),
+	.list	= rates_11289,
 };
 
 /* regulator supplies for sgtl5000, VDDD is an optional external supply */
@@ -57,16 +69,28 @@
 	"HPVDD",
 };
 
-#define ES8328_RATES (SNDRV_PCM_RATE_44100 | \
+#define ES8328_RATES (SNDRV_PCM_RATE_96000 | \
+		SNDRV_PCM_RATE_48000 | \
+		SNDRV_PCM_RATE_44100 | \
+		SNDRV_PCM_RATE_32000 | \
 		SNDRV_PCM_RATE_22050 | \
-		SNDRV_PCM_RATE_11025)
-#define ES8328_FORMATS (SNDRV_PCM_FMTBIT_S16_LE)
+		SNDRV_PCM_RATE_16000 | \
+		SNDRV_PCM_RATE_11025 | \
+		SNDRV_PCM_RATE_8000)
+#define ES8328_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \
+		SNDRV_PCM_FMTBIT_S18_3LE | \
+		SNDRV_PCM_FMTBIT_S20_3LE | \
+		SNDRV_PCM_FMTBIT_S24_LE | \
+		SNDRV_PCM_FMTBIT_S32_LE)
 
 struct es8328_priv {
 	struct regmap *regmap;
 	struct clk *clk;
 	int playback_fs;
 	bool deemph;
+	int mclkdiv2;
+	const struct snd_pcm_hw_constraint_list *sysclk_constraints;
+	const int *mclk_ratios;
 	struct regulator_bulk_data supplies[ES8328_SUPPLY_NUM];
 };
 
@@ -439,54 +463,131 @@
 			mute ? ES8328_DACCONTROL3_DACMUTE : 0);
 }
 
+static int es8328_startup(struct snd_pcm_substream *substream,
+			  struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
+
+	if (es8328->sysclk_constraints)
+		snd_pcm_hw_constraint_list(substream->runtime, 0,
+				SNDRV_PCM_HW_PARAM_RATE,
+				es8328->sysclk_constraints);
+
+	return 0;
+}
+
 static int es8328_hw_params(struct snd_pcm_substream *substream,
 	struct snd_pcm_hw_params *params,
 	struct snd_soc_dai *dai)
 {
 	struct snd_soc_codec *codec = dai->codec;
 	struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
-	int clk_rate;
 	int i;
 	int reg;
-	u8 ratio;
+	int wl;
+	int ratio;
+
+	if (!es8328->sysclk_constraints) {
+		dev_err(codec->dev, "No MCLK configured\n");
+		return -EINVAL;
+	}
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
 		reg = ES8328_DACCONTROL2;
 	else
 		reg = ES8328_ADCCONTROL5;
 
-	clk_rate = clk_get_rate(es8328->clk);
+	for (i = 0; i < es8328->sysclk_constraints->count; i++)
+		if (es8328->sysclk_constraints->list[i] == params_rate(params))
+			break;
 
-	if ((clk_rate != ES8328_SYSCLK_RATE_1X) &&
-		(clk_rate != ES8328_SYSCLK_RATE_2X)) {
-		dev_err(codec->dev,
-			"%s: clock is running at %d Hz, not %d or %d Hz\n",
-			 __func__, clk_rate,
-			 ES8328_SYSCLK_RATE_1X, ES8328_SYSCLK_RATE_2X);
+	if (i == es8328->sysclk_constraints->count) {
+		dev_err(codec->dev, "LRCLK %d unsupported with current clock\n",
+			params_rate(params));
 		return -EINVAL;
 	}
 
-	/* find master mode MCLK to sampling frequency ratio */
-	ratio = mclk_ratios[0].rate;
-	for (i = 1; i < ARRAY_SIZE(mclk_ratios); i++)
-		if (params_rate(params) <= mclk_ratios[i].rate)
-			ratio = mclk_ratios[i].ratio;
+	ratio = es8328->mclk_ratios[i];
+	snd_soc_update_bits(codec, ES8328_MASTERMODE,
+			ES8328_MASTERMODE_MCLKDIV2,
+			es8328->mclkdiv2 ? ES8328_MASTERMODE_MCLKDIV2 : 0);
 
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		es8328->playback_fs = params_rate(params);
-		es8328_set_deemph(codec);
+	switch (params_width(params)) {
+	case 16:
+		wl = 3;
+		break;
+	case 18:
+		wl = 2;
+		break;
+	case 20:
+		wl = 1;
+		break;
+	case 24:
+		wl = 0;
+		break;
+	case 32:
+		wl = 4;
+		break;
+	default:
+		return -EINVAL;
 	}
 
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		snd_soc_update_bits(codec, ES8328_DACCONTROL1,
+				ES8328_DACCONTROL1_DACWL_MASK,
+				wl << ES8328_DACCONTROL1_DACWL_SHIFT);
+
+		es8328->playback_fs = params_rate(params);
+		es8328_set_deemph(codec);
+	} else
+		snd_soc_update_bits(codec, ES8328_ADCCONTROL4,
+				ES8328_ADCCONTROL4_ADCWL_MASK,
+				wl << ES8328_ADCCONTROL4_ADCWL_SHIFT);
+
 	return snd_soc_update_bits(codec, reg, ES8328_RATEMASK, ratio);
 }
 
+static int es8328_set_sysclk(struct snd_soc_dai *codec_dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
+	int mclkdiv2 = 0;
+
+	switch (freq) {
+	case 0:
+		es8328->sysclk_constraints = NULL;
+		es8328->mclk_ratios = NULL;
+		break;
+	case 22579200:
+		mclkdiv2 = 1;
+		/* fallthru */
+	case 11289600:
+		es8328->sysclk_constraints = &constraints_11289;
+		es8328->mclk_ratios = ratios_11289;
+		break;
+	case 24576000:
+		mclkdiv2 = 1;
+		/* fallthru */
+	case 12288000:
+		es8328->sysclk_constraints = &constraints_12288;
+		es8328->mclk_ratios = ratios_12288;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	es8328->mclkdiv2 = mclkdiv2;
+	return 0;
+}
+
 static int es8328_set_dai_fmt(struct snd_soc_dai *codec_dai,
 		unsigned int fmt)
 {
 	struct snd_soc_codec *codec = codec_dai->codec;
-	struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
-	int clk_rate;
-	u8 mode = ES8328_DACCONTROL1_DACWL_16;
+	u8 dac_mode = 0;
+	u8 adc_mode = 0;
 
 	/* set master/slave audio interface */
 	if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBM_CFM)
@@ -495,13 +596,16 @@
 	/* interface format */
 	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
-		mode |= ES8328_DACCONTROL1_DACFORMAT_I2S;
+		dac_mode |= ES8328_DACCONTROL1_DACFORMAT_I2S;
+		adc_mode |= ES8328_ADCCONTROL4_ADCFORMAT_I2S;
 		break;
 	case SND_SOC_DAIFMT_RIGHT_J:
-		mode |= ES8328_DACCONTROL1_DACFORMAT_RJUST;
+		dac_mode |= ES8328_DACCONTROL1_DACFORMAT_RJUST;
+		adc_mode |= ES8328_ADCCONTROL4_ADCFORMAT_RJUST;
 		break;
 	case SND_SOC_DAIFMT_LEFT_J:
-		mode |= ES8328_DACCONTROL1_DACFORMAT_LJUST;
+		dac_mode |= ES8328_DACCONTROL1_DACFORMAT_LJUST;
+		adc_mode |= ES8328_ADCCONTROL4_ADCFORMAT_LJUST;
 		break;
 	default:
 		return -EINVAL;
@@ -511,18 +615,14 @@
 	if ((fmt & SND_SOC_DAIFMT_INV_MASK) != SND_SOC_DAIFMT_NB_NF)
 		return -EINVAL;
 
-	snd_soc_write(codec, ES8328_DACCONTROL1, mode);
-	snd_soc_write(codec, ES8328_ADCCONTROL4, mode);
+	snd_soc_update_bits(codec, ES8328_DACCONTROL1,
+			ES8328_DACCONTROL1_DACFORMAT_MASK, dac_mode);
+	snd_soc_update_bits(codec, ES8328_ADCCONTROL4,
+			ES8328_ADCCONTROL4_ADCFORMAT_MASK, adc_mode);
 
 	/* Master serial port mode, with BCLK generated automatically */
-	clk_rate = clk_get_rate(es8328->clk);
-	if (clk_rate == ES8328_SYSCLK_RATE_1X)
-		snd_soc_write(codec, ES8328_MASTERMODE,
-				ES8328_MASTERMODE_MSC);
-	else
-		snd_soc_write(codec, ES8328_MASTERMODE,
-				ES8328_MASTERMODE_MCLKDIV2 |
-				ES8328_MASTERMODE_MSC);
+	snd_soc_update_bits(codec, ES8328_MASTERMODE,
+			ES8328_MASTERMODE_MSC, ES8328_MASTERMODE_MSC);
 
 	return 0;
 }
@@ -579,8 +679,10 @@
 }
 
 static const struct snd_soc_dai_ops es8328_dai_ops = {
+	.startup	= es8328_startup,
 	.hw_params	= es8328_hw_params,
 	.digital_mute	= es8328_mute,
+	.set_sysclk	= es8328_set_sysclk,
 	.set_fmt	= es8328_set_dai_fmt,
 };
 
@@ -601,6 +703,7 @@
 		.formats = ES8328_FORMATS,
 	},
 	.ops = &es8328_dai_ops,
+	.symmetric_rates = 1,
 };
 
 static int es8328_suspend(struct snd_soc_codec *codec)
@@ -708,6 +811,7 @@
 	.val_bits	= 8,
 	.max_register	= ES8328_REG_MAX,
 	.cache_type	= REGCACHE_RBTREE,
+	.use_single_rw	= true,
 };
 EXPORT_SYMBOL_GPL(es8328_regmap_config);
 
diff --git a/sound/soc/codecs/es8328.h b/sound/soc/codecs/es8328.h
index 156c748..1a736e7 100644
--- a/sound/soc/codecs/es8328.h
+++ b/sound/soc/codecs/es8328.h
@@ -22,7 +22,7 @@
 #define ES8328_CONTROL1_VMIDSEL_50k (1 << 0)
 #define ES8328_CONTROL1_VMIDSEL_500k (2 << 0)
 #define ES8328_CONTROL1_VMIDSEL_5k (3 << 0)
-#define ES8328_CONTROL1_VMIDSEL_MASK (7 << 0)
+#define ES8328_CONTROL1_VMIDSEL_MASK (3 << 0)
 #define ES8328_CONTROL1_ENREF (1 << 2)
 #define ES8328_CONTROL1_SEQEN (1 << 3)
 #define ES8328_CONTROL1_SAMEFS (1 << 4)
@@ -84,7 +84,20 @@
 #define ES8328_ADCCONTROL1	0x09
 #define ES8328_ADCCONTROL2	0x0a
 #define ES8328_ADCCONTROL3	0x0b
+
 #define ES8328_ADCCONTROL4	0x0c
+#define ES8328_ADCCONTROL4_ADCFORMAT_MASK (3 << 0)
+#define ES8328_ADCCONTROL4_ADCFORMAT_I2S (0 << 0)
+#define ES8328_ADCCONTROL4_ADCFORMAT_LJUST (1 << 0)
+#define ES8328_ADCCONTROL4_ADCFORMAT_RJUST (2 << 0)
+#define ES8328_ADCCONTROL4_ADCFORMAT_PCM (3 << 0)
+#define ES8328_ADCCONTROL4_ADCWL_SHIFT 2
+#define ES8328_ADCCONTROL4_ADCWL_MASK (7 << 2)
+#define ES8328_ADCCONTROL4_ADCLRP_I2S_POL_NORMAL (0 << 5)
+#define ES8328_ADCCONTROL4_ADCLRP_I2S_POL_INV (1 << 5)
+#define ES8328_ADCCONTROL4_ADCLRP_PCM_MSB_CLK2 (0 << 5)
+#define ES8328_ADCCONTROL4_ADCLRP_PCM_MSB_CLK1 (1 << 5)
+
 #define ES8328_ADCCONTROL5	0x0d
 #define ES8328_ADCCONTROL5_RATEMASK (0x1f << 0)
 
@@ -109,15 +122,13 @@
 #define ES8328_ADCCONTROL14	0x16
 
 #define ES8328_DACCONTROL1	0x17
+#define ES8328_DACCONTROL1_DACFORMAT_MASK (3 << 1)
 #define ES8328_DACCONTROL1_DACFORMAT_I2S (0 << 1)
 #define ES8328_DACCONTROL1_DACFORMAT_LJUST (1 << 1)
 #define ES8328_DACCONTROL1_DACFORMAT_RJUST (2 << 1)
 #define ES8328_DACCONTROL1_DACFORMAT_PCM (3 << 1)
-#define ES8328_DACCONTROL1_DACWL_24 (0 << 3)
-#define ES8328_DACCONTROL1_DACWL_20 (1 << 3)
-#define ES8328_DACCONTROL1_DACWL_18 (2 << 3)
-#define ES8328_DACCONTROL1_DACWL_16 (3 << 3)
-#define ES8328_DACCONTROL1_DACWL_32 (4 << 3)
+#define ES8328_DACCONTROL1_DACWL_SHIFT 3
+#define ES8328_DACCONTROL1_DACWL_MASK (7 << 3)
 #define ES8328_DACCONTROL1_DACLRP_I2S_POL_NORMAL (0 << 6)
 #define ES8328_DACCONTROL1_DACLRP_I2S_POL_INV (1 << 6)
 #define ES8328_DACCONTROL1_DACLRP_PCM_MSB_CLK2 (0 << 6)
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index aaa038f..181cd3b 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -29,6 +29,7 @@
 #include <sound/hdaudio_ext.h>
 #include <sound/hda_i915.h>
 #include <sound/pcm_drm_eld.h>
+#include <sound/hda_chmap.h>
 #include "../../hda/local.h"
 #include "hdac_hdmi.h"
 
@@ -60,11 +61,17 @@
 	struct hdac_hdmi_cvt_params params;
 };
 
+/* Currently only spk_alloc, more to be added */
+struct hdac_hdmi_parsed_eld {
+	u8 spk_alloc;
+};
+
 struct hdac_hdmi_eld {
 	bool	monitor_present;
 	bool	eld_valid;
 	int	eld_size;
 	char    eld_buffer[ELD_MAX_SIZE];
+	struct	hdac_hdmi_parsed_eld info;
 };
 
 struct hdac_hdmi_pin {
@@ -76,6 +83,10 @@
 	struct hdac_ext_device *edev;
 	int repoll_count;
 	struct delayed_work work;
+	struct mutex lock;
+	bool chmap_set;
+	unsigned char chmap[8]; /* ALSA API channel-map */
+	int channels; /* current number of channels */
 };
 
 struct hdac_hdmi_pcm {
@@ -100,8 +111,22 @@
 	int num_pin;
 	int num_cvt;
 	struct mutex pin_mutex;
+	struct hdac_chmap chmap;
 };
 
+static struct hdac_hdmi_pcm *get_hdmi_pcm_from_id(struct hdac_hdmi_priv *hdmi,
+						int pcm_idx)
+{
+	struct hdac_hdmi_pcm *pcm;
+
+	list_for_each_entry(pcm, &hdmi->pcm_list, head) {
+		if (pcm->pcm_id == pcm_idx)
+			return pcm;
+	}
+
+	return NULL;
+}
+
 static inline struct hdac_ext_device *to_hda_ext_device(struct device *dev)
 {
 	struct hdac_device *hdac = dev_to_hdac_dev(dev);
@@ -278,26 +303,31 @@
 	int i;
 	const u8 *eld_buf;
 	u8 conn_type;
-	int channels = 2;
+	int channels, ca;
 
 	list_for_each_entry(pin, &hdmi->pin_list, head) {
 		if (pin->nid == pin_nid)
 			break;
 	}
 
+	ca = snd_hdac_channel_allocation(&hdac->hdac, pin->eld.info.spk_alloc,
+			pin->channels, pin->chmap_set, true, pin->chmap);
+
+	channels = snd_hdac_get_active_channels(ca);
+	hdmi->chmap.ops.set_channel_count(&hdac->hdac, cvt_nid, channels);
+
+	snd_hdac_setup_channel_mapping(&hdmi->chmap, pin->nid, false, ca,
+				pin->channels, pin->chmap, pin->chmap_set);
+
 	eld_buf = pin->eld.eld_buffer;
 	conn_type = drm_eld_get_conn_type(eld_buf);
 
-	/* setup channel count */
-	snd_hdac_codec_write(&hdac->hdac, cvt_nid, 0,
-			    AC_VERB_SET_CVT_CHAN_COUNT, channels - 1);
-
 	switch (conn_type) {
 	case DRM_ELD_CONN_TYPE_HDMI:
 		hdmi_audio_infoframe_init(&frame);
 
-		/* Default stereo for now */
 		frame.channels = channels;
+		frame.channel_allocation = ca;
 
 		ret = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
 		if (ret < 0)
@@ -311,7 +341,7 @@
 		dp_ai.len	= 0x1b;
 		dp_ai.ver	= 0x11 << 2;
 		dp_ai.CC02_CT47	= channels - 1;
-		dp_ai.CA	= 0;
+		dp_ai.CA	= ca;
 
 		dip = (u8 *)&dp_ai;
 		break;
@@ -370,17 +400,23 @@
 	struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai);
 	struct hdac_hdmi_priv *hdmi = hdac->private_data;
 	struct hdac_hdmi_dai_pin_map *dai_map;
+	struct hdac_hdmi_pin *pin;
 	struct hdac_ext_dma_params *dd;
 	int ret;
 
 	dai_map = &hdmi->dai_map[dai->id];
+	pin = dai_map->pin;
 
 	dd = (struct hdac_ext_dma_params *)snd_soc_dai_get_dma_data(dai, substream);
 	dev_dbg(&hdac->hdac.dev, "stream tag from cpu dai %d format in cvt 0x%x\n",
 			dd->stream_tag,	dd->format);
 
+	mutex_lock(&pin->lock);
+	pin->channels = substream->runtime->channels;
+
 	ret = hdac_hdmi_setup_audio_infoframe(hdac, dai_map->cvt->nid,
 						dai_map->pin->nid);
+	mutex_unlock(&pin->lock);
 	if (ret < 0)
 		return ret;
 
@@ -640,6 +676,12 @@
 		snd_hdac_codec_write(&hdac->hdac, dai_map->pin->nid, 0,
 			AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
 
+		mutex_lock(&dai_map->pin->lock);
+		dai_map->pin->chmap_set = false;
+		memset(dai_map->pin->chmap, 0, sizeof(dai_map->pin->chmap));
+		dai_map->pin->channels = 0;
+		mutex_unlock(&dai_map->pin->lock);
+
 		dai_map->pin = NULL;
 	}
 }
@@ -647,10 +689,19 @@
 static int
 hdac_hdmi_query_cvt_params(struct hdac_device *hdac, struct hdac_hdmi_cvt *cvt)
 {
+	unsigned int chans;
+	struct hdac_ext_device *edev = to_ehdac_device(hdac);
+	struct hdac_hdmi_priv *hdmi = edev->private_data;
 	int err;
 
-	/* Only stereo supported as of now */
-	cvt->params.channels_min = cvt->params.channels_max = 2;
+	chans = get_wcaps(hdac, cvt->nid);
+	chans = get_wcaps_channels(chans);
+
+	cvt->params.channels_min = 2;
+
+	cvt->params.channels_max = chans;
+	if (chans > hdmi->chmap.channels_max)
+		hdmi->chmap.channels_max = chans;
 
 	err = snd_hdac_query_supported_pcm(hdac, cvt->nid,
 			&cvt->params.rates,
@@ -1008,6 +1059,12 @@
 	return hdac_hdmi_query_cvt_params(&edev->hdac, cvt);
 }
 
+static void hdac_hdmi_parse_eld(struct hdac_ext_device *edev,
+			struct hdac_hdmi_pin *pin)
+{
+	pin->eld.info.spk_alloc = pin->eld.eld_buffer[DRM_ELD_SPEAKER];
+}
+
 static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin, int repoll)
 {
 	struct hdac_ext_device *edev = pin->edev;
@@ -1065,6 +1122,7 @@
 
 				snd_jack_report(pcm->jack, SND_JACK_AVOUT);
 			}
+			hdac_hdmi_parse_eld(edev, pin);
 
 			print_hex_dump_bytes("ELD: ", DUMP_PREFIX_OFFSET,
 					pin->eld.eld_buffer, pin->eld.eld_size);
@@ -1123,6 +1181,7 @@
 	hdmi->num_pin++;
 
 	pin->edev = edev;
+	mutex_init(&pin->lock);
 	INIT_DELAYED_WORK(&pin->work, hdac_hdmi_repoll_eld);
 
 	return 0;
@@ -1342,6 +1401,19 @@
 	.pin_eld_notify	= hdac_hdmi_eld_notify_cb,
 };
 
+static struct snd_pcm *hdac_hdmi_get_pcm_from_id(struct snd_soc_card *card,
+						int device)
+{
+	struct snd_soc_pcm_runtime *rtd;
+
+	list_for_each_entry(rtd, &card->rtd_list, list) {
+		if (rtd->pcm && (rtd->pcm->device == device))
+			return rtd->pcm;
+	}
+
+	return NULL;
+}
+
 int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device)
 {
 	char jack_name[NAME_SIZE];
@@ -1351,6 +1423,8 @@
 		snd_soc_component_get_dapm(&codec->component);
 	struct hdac_hdmi_priv *hdmi = edev->private_data;
 	struct hdac_hdmi_pcm *pcm;
+	struct snd_pcm *snd_pcm;
+	int err;
 
 	/*
 	 * this is a new PCM device, create new pcm and
@@ -1362,6 +1436,18 @@
 	pcm->pcm_id = device;
 	pcm->cvt = hdmi->dai_map[dai->id].cvt;
 
+	snd_pcm = hdac_hdmi_get_pcm_from_id(dai->component->card, device);
+	if (snd_pcm) {
+		err = snd_hdac_add_chmap_ctls(snd_pcm, device, &hdmi->chmap);
+		if (err < 0) {
+			dev_err(&edev->hdac.dev,
+				"chmap control add failed with err: %d for pcm: %d\n",
+				err, device);
+			kfree(pcm);
+			return err;
+		}
+	}
+
 	list_add_tail(&pcm->head, &hdmi->pcm_list);
 
 	sprintf(jack_name, "HDMI/DP, pcm=%d Jack", device);
@@ -1378,10 +1464,18 @@
 	struct snd_soc_dapm_context *dapm =
 		snd_soc_component_get_dapm(&codec->component);
 	struct hdac_hdmi_pin *pin;
+	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
 	edev->scodec = codec;
 
+	/*
+	 * hold the ref while we probe, also no need to drop the ref on
+	 * exit, we call pm_runtime_suspend() so that will do for us
+	 */
+	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
+
 	ret = create_fill_widget_route_map(dapm);
 	if (ret < 0)
 		return ret;
@@ -1475,19 +1569,83 @@
 	.idle_bias_off	= true,
 };
 
+static void hdac_hdmi_get_chmap(struct hdac_device *hdac, int pcm_idx,
+					unsigned char *chmap)
+{
+	struct hdac_ext_device *edev = to_ehdac_device(hdac);
+	struct hdac_hdmi_priv *hdmi = edev->private_data;
+	struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx);
+	struct hdac_hdmi_pin *pin = pcm->pin;
+
+	/* chmap is already set to 0 in caller */
+	if (!pin)
+		return;
+
+	memcpy(chmap, pin->chmap, ARRAY_SIZE(pin->chmap));
+}
+
+static void hdac_hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx,
+				unsigned char *chmap, int prepared)
+{
+	struct hdac_ext_device *edev = to_ehdac_device(hdac);
+	struct hdac_hdmi_priv *hdmi = edev->private_data;
+	struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx);
+	struct hdac_hdmi_pin *pin = pcm->pin;
+
+	mutex_lock(&pin->lock);
+	pin->chmap_set = true;
+	memcpy(pin->chmap, chmap, ARRAY_SIZE(pin->chmap));
+	if (prepared)
+		hdac_hdmi_setup_audio_infoframe(edev, pcm->cvt->nid, pin->nid);
+	mutex_unlock(&pin->lock);
+}
+
+static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx)
+{
+	struct hdac_ext_device *edev = to_ehdac_device(hdac);
+	struct hdac_hdmi_priv *hdmi = edev->private_data;
+	struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx);
+	struct hdac_hdmi_pin *pin = pcm->pin;
+
+	return pin ? true:false;
+}
+
+static int hdac_hdmi_get_spk_alloc(struct hdac_device *hdac, int pcm_idx)
+{
+	struct hdac_ext_device *edev = to_ehdac_device(hdac);
+	struct hdac_hdmi_priv *hdmi = edev->private_data;
+	struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx);
+	struct hdac_hdmi_pin *pin = pcm->pin;
+
+	if (!pin || !pin->eld.eld_valid)
+		return 0;
+
+	return pin->eld.info.spk_alloc;
+}
+
 static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 {
 	struct hdac_device *codec = &edev->hdac;
 	struct hdac_hdmi_priv *hdmi_priv;
 	struct snd_soc_dai_driver *hdmi_dais = NULL;
+	struct hdac_ext_link *hlink = NULL;
 	int num_dais = 0;
 	int ret = 0;
 
+	/* hold the ref while we probe */
+	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
+
 	hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL);
 	if (hdmi_priv == NULL)
 		return -ENOMEM;
 
 	edev->private_data = hdmi_priv;
+	snd_hdac_register_chmap_ops(codec, &hdmi_priv->chmap);
+	hdmi_priv->chmap.ops.get_chmap = hdac_hdmi_get_chmap;
+	hdmi_priv->chmap.ops.set_chmap = hdac_hdmi_set_chmap;
+	hdmi_priv->chmap.ops.is_pcm_attached = is_hdac_hdmi_pcm_attached;
+	hdmi_priv->chmap.ops.get_spk_alloc = hdac_hdmi_get_spk_alloc;
 
 	dev_set_drvdata(&codec->dev, edev);
 
@@ -1516,8 +1674,12 @@
 	}
 
 	/* ASoC specific initialization */
-	return snd_soc_register_codec(&codec->dev, &hdmi_hda_codec,
-			hdmi_dais, num_dais);
+	ret = snd_soc_register_codec(&codec->dev, &hdmi_hda_codec,
+					hdmi_dais, num_dais);
+
+	snd_hdac_ext_bus_link_put(edev->ebus, hlink);
+
+	return ret;
 }
 
 static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
@@ -1556,6 +1718,8 @@
 	struct hdac_ext_device *edev = to_hda_ext_device(dev);
 	struct hdac_device *hdac = &edev->hdac;
 	struct hdac_bus *bus = hdac->bus;
+	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
+	struct hdac_ext_link *hlink = NULL;
 	int err;
 
 	dev_dbg(dev, "Enter: %s\n", __func__);
@@ -1579,6 +1743,9 @@
 		return err;
 	}
 
+	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	snd_hdac_ext_bus_link_put(ebus, hlink);
+
 	return 0;
 }
 
@@ -1587,6 +1754,8 @@
 	struct hdac_ext_device *edev = to_hda_ext_device(dev);
 	struct hdac_device *hdac = &edev->hdac;
 	struct hdac_bus *bus = hdac->bus;
+	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
+	struct hdac_ext_link *hlink = NULL;
 	int err;
 
 	dev_dbg(dev, "Enter: %s\n", __func__);
@@ -1595,6 +1764,9 @@
 	if (!bus)
 		return 0;
 
+	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	snd_hdac_ext_bus_link_get(ebus, hlink);
+
 	err = snd_hdac_display_power(bus, true);
 	if (err < 0) {
 		dev_err(bus->dev, "Cannot turn on display power on i915\n");
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
new file mode 100644
index 0000000..8e36e88
--- /dev/null
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -0,0 +1,432 @@
+/*
+ * ALSA SoC codec for HDMI encoder drivers
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ * Author: Jyri Sarha <jsarha@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/pcm_drm_eld.h>
+#include <sound/hdmi-codec.h>
+#include <sound/pcm_iec958.h>
+
+#include <drm/drm_crtc.h> /* This is only to get MAX_ELD_BYTES */
+
+struct hdmi_codec_priv {
+	struct hdmi_codec_pdata hcd;
+	struct snd_soc_dai_driver *daidrv;
+	struct hdmi_codec_daifmt daifmt[2];
+	struct mutex current_stream_lock;
+	struct snd_pcm_substream *current_stream;
+	struct snd_pcm_hw_constraint_list ratec;
+	uint8_t eld[MAX_ELD_BYTES];
+};
+
+static const struct snd_soc_dapm_widget hdmi_widgets[] = {
+	SND_SOC_DAPM_OUTPUT("TX"),
+};
+
+static const struct snd_soc_dapm_route hdmi_routes[] = {
+	{ "TX", NULL, "Playback" },
+};
+
+enum {
+	DAI_ID_I2S = 0,
+	DAI_ID_SPDIF,
+};
+
+static int hdmi_eld_ctl_info(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_info *uinfo)
+{
+	struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
+	uinfo->count = sizeof(hcp->eld);
+
+	return 0;
+}
+
+static int hdmi_eld_ctl_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
+
+	memcpy(ucontrol->value.bytes.data, hcp->eld, sizeof(hcp->eld));
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new hdmi_controls[] = {
+	{
+		.access = SNDRV_CTL_ELEM_ACCESS_READ |
+			  SNDRV_CTL_ELEM_ACCESS_VOLATILE,
+		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
+		.name = "ELD",
+		.info = hdmi_eld_ctl_info,
+		.get = hdmi_eld_ctl_get,
+	},
+};
+
+static int hdmi_codec_new_stream(struct snd_pcm_substream *substream,
+				 struct snd_soc_dai *dai)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+	int ret = 0;
+
+	mutex_lock(&hcp->current_stream_lock);
+	if (!hcp->current_stream) {
+		hcp->current_stream = substream;
+	} else if (hcp->current_stream != substream) {
+		dev_err(dai->dev, "Only one simultaneous stream supported!\n");
+		ret = -EINVAL;
+	}
+	mutex_unlock(&hcp->current_stream_lock);
+
+	return ret;
+}
+
+static int hdmi_codec_startup(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *dai)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+	int ret = 0;
+
+	dev_dbg(dai->dev, "%s()\n", __func__);
+
+	ret = hdmi_codec_new_stream(substream, dai);
+	if (ret)
+		return ret;
+
+	if (hcp->hcd.ops->audio_startup) {
+		ret = hcp->hcd.ops->audio_startup(dai->dev->parent);
+		if (ret) {
+			mutex_lock(&hcp->current_stream_lock);
+			hcp->current_stream = NULL;
+			mutex_unlock(&hcp->current_stream_lock);
+			return ret;
+		}
+	}
+
+	if (hcp->hcd.ops->get_eld) {
+		ret = hcp->hcd.ops->get_eld(dai->dev->parent, hcp->eld,
+					    sizeof(hcp->eld));
+
+		if (!ret) {
+			ret = snd_pcm_hw_constraint_eld(substream->runtime,
+							hcp->eld);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static void hdmi_codec_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+
+	dev_dbg(dai->dev, "%s()\n", __func__);
+
+	WARN_ON(hcp->current_stream != substream);
+
+	hcp->hcd.ops->audio_shutdown(dai->dev->parent);
+
+	mutex_lock(&hcp->current_stream_lock);
+	hcp->current_stream = NULL;
+	mutex_unlock(&hcp->current_stream_lock);
+}
+
+static int hdmi_codec_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+	struct hdmi_codec_params hp = {
+		.iec = {
+			.status = { 0 },
+			.subcode = { 0 },
+			.pad = 0,
+			.dig_subframe = { 0 },
+		}
+	};
+	int ret;
+
+	dev_dbg(dai->dev, "%s() width %d rate %d channels %d\n", __func__,
+		params_width(params), params_rate(params),
+		params_channels(params));
+
+	if (params_width(params) > 24)
+		params->msbits = 24;
+
+	ret = snd_pcm_create_iec958_consumer_hw_params(params, hp.iec.status,
+						       sizeof(hp.iec.status));
+	if (ret < 0) {
+		dev_err(dai->dev, "Creating IEC958 channel status failed %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = hdmi_codec_new_stream(substream, dai);
+	if (ret)
+		return ret;
+
+	hdmi_audio_infoframe_init(&hp.cea);
+	hp.cea.channels = params_channels(params);
+	hp.cea.coding_type = HDMI_AUDIO_CODING_TYPE_STREAM;
+	hp.cea.sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM;
+	hp.cea.sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM;
+
+	hp.sample_width = params_width(params);
+	hp.sample_rate = params_rate(params);
+	hp.channels = params_channels(params);
+
+	return hcp->hcd.ops->hw_params(dai->dev->parent, &hcp->daifmt[dai->id],
+				       &hp);
+}
+
+static int hdmi_codec_set_fmt(struct snd_soc_dai *dai,
+			      unsigned int fmt)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+	struct hdmi_codec_daifmt cf = { 0 };
+	int ret = 0;
+
+	dev_dbg(dai->dev, "%s()\n", __func__);
+
+	if (dai->id == DAI_ID_SPDIF) {
+		cf.fmt = HDMI_SPDIF;
+	} else {
+		switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+		case SND_SOC_DAIFMT_CBM_CFM:
+			cf.bit_clk_master = 1;
+			cf.frame_clk_master = 1;
+			break;
+		case SND_SOC_DAIFMT_CBS_CFM:
+			cf.frame_clk_master = 1;
+			break;
+		case SND_SOC_DAIFMT_CBM_CFS:
+			cf.bit_clk_master = 1;
+			break;
+		case SND_SOC_DAIFMT_CBS_CFS:
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			cf.frame_clk_inv = 1;
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			cf.bit_clk_inv = 1;
+			break;
+		case SND_SOC_DAIFMT_IB_IF:
+			cf.frame_clk_inv = 1;
+			cf.bit_clk_inv = 1;
+			break;
+		}
+
+		switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+		case SND_SOC_DAIFMT_I2S:
+			cf.fmt = HDMI_I2S;
+			break;
+		case SND_SOC_DAIFMT_DSP_A:
+			cf.fmt = HDMI_DSP_A;
+			break;
+		case SND_SOC_DAIFMT_DSP_B:
+			cf.fmt = HDMI_DSP_B;
+			break;
+		case SND_SOC_DAIFMT_RIGHT_J:
+			cf.fmt = HDMI_RIGHT_J;
+			break;
+		case SND_SOC_DAIFMT_LEFT_J:
+			cf.fmt = HDMI_LEFT_J;
+			break;
+		case SND_SOC_DAIFMT_AC97:
+			cf.fmt = HDMI_AC97;
+			break;
+		default:
+			dev_err(dai->dev, "Invalid DAI interface format\n");
+			return -EINVAL;
+		}
+	}
+
+	hcp->daifmt[dai->id] = cf;
+
+	return ret;
+}
+
+static int hdmi_codec_digital_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+
+	dev_dbg(dai->dev, "%s()\n", __func__);
+
+	if (hcp->hcd.ops->digital_mute)
+		return hcp->hcd.ops->digital_mute(dai->dev->parent, mute);
+
+	return 0;
+}
+
+static const struct snd_soc_dai_ops hdmi_dai_ops = {
+	.startup	= hdmi_codec_startup,
+	.shutdown	= hdmi_codec_shutdown,
+	.hw_params	= hdmi_codec_hw_params,
+	.set_fmt	= hdmi_codec_set_fmt,
+	.digital_mute	= hdmi_codec_digital_mute,
+};
+
+
+#define HDMI_RATES	(SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |\
+			 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |\
+			 SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |\
+			 SNDRV_PCM_RATE_192000)
+
+#define SPDIF_FORMATS	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE |\
+			 SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S20_3BE |\
+			 SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_3BE |\
+			 SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE)
+
+/*
+ * This list is only for formats allowed on the I2S bus. So there is
+ * some formats listed that are not supported by HDMI interface. For
+ * instance allowing the 32-bit formats enables 24-precision with CPU
+ * DAIs that do not support 24-bit formats. If the extra formats cause
+ * problems, we should add the video side driver an option to disable
+ * them.
+ */
+#define I2S_FORMATS	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE |\
+			 SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S20_3BE |\
+			 SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_3BE |\
+			 SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE |\
+			 SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_S32_BE)
+
+static struct snd_soc_dai_driver hdmi_i2s_dai = {
+	.name = "i2s-hifi",
+	.id = DAI_ID_I2S,
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 8,
+		.rates = HDMI_RATES,
+		.formats = I2S_FORMATS,
+		.sig_bits = 24,
+	},
+	.ops = &hdmi_dai_ops,
+};
+
+static const struct snd_soc_dai_driver hdmi_spdif_dai = {
+	.name = "spdif-hifi",
+	.id = DAI_ID_SPDIF,
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 2,
+		.rates = HDMI_RATES,
+		.formats = SPDIF_FORMATS,
+	},
+	.ops = &hdmi_dai_ops,
+};
+
+static struct snd_soc_codec_driver hdmi_codec = {
+	.controls = hdmi_controls,
+	.num_controls = ARRAY_SIZE(hdmi_controls),
+	.dapm_widgets = hdmi_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(hdmi_widgets),
+	.dapm_routes = hdmi_routes,
+	.num_dapm_routes = ARRAY_SIZE(hdmi_routes),
+};
+
+static int hdmi_codec_probe(struct platform_device *pdev)
+{
+	struct hdmi_codec_pdata *hcd = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct hdmi_codec_priv *hcp;
+	int dai_count, i = 0;
+	int ret;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!hcd) {
+		dev_err(dev, "%s: No plalform data\n", __func__);
+		return -EINVAL;
+	}
+
+	dai_count = hcd->i2s + hcd->spdif;
+	if (dai_count < 1 || !hcd->ops || !hcd->ops->hw_params ||
+	    !hcd->ops->audio_shutdown) {
+		dev_err(dev, "%s: Invalid parameters\n", __func__);
+		return -EINVAL;
+	}
+
+	hcp = devm_kzalloc(dev, sizeof(*hcp), GFP_KERNEL);
+	if (!hcp)
+		return -ENOMEM;
+
+	hcp->hcd = *hcd;
+	mutex_init(&hcp->current_stream_lock);
+
+	hcp->daidrv = devm_kzalloc(dev, dai_count * sizeof(*hcp->daidrv),
+				   GFP_KERNEL);
+	if (!hcp->daidrv)
+		return -ENOMEM;
+
+	if (hcd->i2s) {
+		hcp->daidrv[i] = hdmi_i2s_dai;
+		hcp->daidrv[i].playback.channels_max =
+			hcd->max_i2s_channels;
+		i++;
+	}
+
+	if (hcd->spdif)
+		hcp->daidrv[i] = hdmi_spdif_dai;
+
+	ret = snd_soc_register_codec(dev, &hdmi_codec, hcp->daidrv,
+				     dai_count);
+	if (ret) {
+		dev_err(dev, "%s: snd_soc_register_codec() failed (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	dev_set_drvdata(dev, hcp);
+	return 0;
+}
+
+static int hdmi_codec_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_codec(&pdev->dev);
+	return 0;
+}
+
+static struct platform_driver hdmi_codec_driver = {
+	.driver = {
+		.name = HDMI_CODEC_DRV_NAME,
+	},
+	.probe = hdmi_codec_probe,
+	.remove = hdmi_codec_remove,
+};
+
+module_platform_driver(hdmi_codec_driver);
+
+MODULE_AUTHOR("Jyri Sarha <jsarha@ti.com>");
+MODULE_DESCRIPTION("HDMI Audio Codec Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" HDMI_CODEC_DRV_NAME);
diff --git a/sound/soc/codecs/pcm5102a.c b/sound/soc/codecs/pcm5102a.c
new file mode 100644
index 0000000..ed51567
--- /dev/null
+++ b/sound/soc/codecs/pcm5102a.c
@@ -0,0 +1,69 @@
+/*
+ * Driver for the PCM5102A codec
+ *
+ * Author:	Florian Meier <florian.meier@koalo.de>
+ *		Copyright 2013
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <sound/soc.h>
+
+static struct snd_soc_dai_driver pcm5102a_dai = {
+	.name = "pcm5102a-hifi",
+	.playback = {
+		.channels_min = 2,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000_192000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE |
+			   SNDRV_PCM_FMTBIT_S24_LE |
+			   SNDRV_PCM_FMTBIT_S32_LE
+	},
+};
+
+static struct snd_soc_codec_driver soc_codec_dev_pcm5102a;
+
+static int pcm5102a_probe(struct platform_device *pdev)
+{
+	return snd_soc_register_codec(&pdev->dev, &soc_codec_dev_pcm5102a,
+			&pcm5102a_dai, 1);
+}
+
+static int pcm5102a_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_codec(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id pcm5102a_of_match[] = {
+	{ .compatible = "ti,pcm5102a", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, pcm5102a_of_match);
+
+static struct platform_driver pcm5102a_codec_driver = {
+	.probe		= pcm5102a_probe,
+	.remove		= pcm5102a_remove,
+	.driver		= {
+		.name	= "pcm5102a-codec",
+		.owner	= THIS_MODULE,
+		.of_match_table = pcm5102a_of_match,
+	},
+};
+
+module_platform_driver(pcm5102a_codec_driver);
+
+MODULE_DESCRIPTION("ASoC PCM5102A codec driver");
+MODULE_AUTHOR("Florian Meier <florian.meier@koalo.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/rt298.c b/sound/soc/codecs/rt298.c
index f0e6c06..a1aaffc 100644
--- a/sound/soc/codecs/rt298.c
+++ b/sound/soc/codecs/rt298.c
@@ -17,6 +17,7 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
+#include <linux/dmi.h>
 #include <linux/acpi.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
@@ -1132,6 +1133,17 @@
 };
 MODULE_DEVICE_TABLE(acpi, rt298_acpi_match);
 
+static const struct dmi_system_id force_combo_jack_table[] = {
+	{
+		.ident = "Intel Broxton P",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Broxton P")
+		}
+	},
+	{ }
+};
+
 static int rt298_i2c_probe(struct i2c_client *i2c,
 			   const struct i2c_device_id *id)
 {
@@ -1184,11 +1196,16 @@
 
 	/* enable jack combo mode on supported devices */
 	acpiid = acpi_match_device(dev->driver->acpi_match_table, dev);
-	if (acpiid) {
+	if (acpiid && acpiid->driver_data) {
 		rt298->pdata = *(struct rt298_platform_data *)
 				acpiid->driver_data;
 	}
 
+	if (dmi_check_system(force_combo_jack_table)) {
+		rt298->pdata.cbj_en = true;
+		rt298->pdata.gpio2_en = false;
+	}
+
 	/* VREF Charging */
 	regmap_update_bits(rt298->regmap, 0x04, 0x80, 0x80);
 	regmap_update_bits(rt298->regmap, 0x1b, 0x860, 0x860);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 7af5e73..3c6594d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3286,10 +3286,8 @@
 		if (btn_type == 0)/* button release */
 			report =  rt5645->jack_type;
 		else {
-			if (rt5645->pdata.jd_invert) {
-				mod_timer(&rt5645->btn_check_timer,
-					msecs_to_jiffies(100));
-			}
+			mod_timer(&rt5645->btn_check_timer,
+				msecs_to_jiffies(100));
 		}
 
 		break;
@@ -3557,6 +3555,12 @@
 			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
 		},
 	},
+	{
+		.ident = "Google Setzer",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
+		},
+	},
 	{ }
 };
 
@@ -3810,9 +3814,9 @@
 	if (rt5645->pdata.jd_invert) {
 		regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2,
 			RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV);
-		setup_timer(&rt5645->btn_check_timer,
-			rt5645_btn_check_callback, (unsigned long)rt5645);
 	}
+	setup_timer(&rt5645->btn_check_timer,
+		rt5645_btn_check_callback, (unsigned long)rt5645);
 
 	INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work);
 	INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work);
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index 1bae17e..da60e3f 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -2098,10 +2098,14 @@
 
 static int wm5102_remove(struct platform_device *pdev)
 {
+	struct wm5102_priv *wm5102 = platform_get_drvdata(pdev);
+
 	snd_soc_unregister_platform(&pdev->dev);
 	snd_soc_unregister_codec(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	wm_adsp2_remove(&wm5102->core.adsp[0]);
+
 	return 0;
 }
 
diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c
index 2728ac5..b5820e4 100644
--- a/sound/soc/codecs/wm5110.c
+++ b/sound/soc/codecs/wm5110.c
@@ -2437,10 +2437,16 @@
 
 static int wm5110_remove(struct platform_device *pdev)
 {
+	struct wm5110_priv *wm5110 = platform_get_drvdata(pdev);
+	int i;
+
 	snd_soc_unregister_platform(&pdev->dev);
 	snd_soc_unregister_codec(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	for (i = 0; i < WM5110_NUM_ADSP; i++)
+		wm_adsp2_remove(&wm5110->core.adsp[i]);
+
 	return 0;
 }
 
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index d3b1cb1..a07bd7c 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -160,6 +160,8 @@
 #define ADSP2_RAM_RDY_SHIFT                    0
 #define ADSP2_RAM_RDY_WIDTH                    1
 
+#define ADSP_MAX_STD_CTRL_SIZE               512
+
 struct wm_adsp_buf {
 	struct list_head list;
 	void *buf;
@@ -271,8 +273,11 @@
 	__be32 words_written[2];	/* total words written (64 bit) */
 };
 
+struct wm_adsp_compr;
+
 struct wm_adsp_compr_buf {
 	struct wm_adsp *dsp;
+	struct wm_adsp_compr *compr;
 
 	struct wm_adsp_buffer_region *regions;
 	u32 host_buf_ptr;
@@ -435,6 +440,7 @@
 	size_t len;
 	unsigned int set:1;
 	struct snd_kcontrol *kcontrol;
+	struct soc_bytes_ext bytes_ext;
 	unsigned int flags;
 };
 
@@ -711,10 +717,17 @@
 		 be16_to_cpu(scratch[3]));
 }
 
+static inline struct wm_coeff_ctl *bytes_ext_to_ctl(struct soc_bytes_ext *ext)
+{
+	return container_of(ext, struct wm_coeff_ctl, bytes_ext);
+}
+
 static int wm_coeff_info(struct snd_kcontrol *kctl,
 			 struct snd_ctl_elem_info *uinfo)
 {
-	struct wm_coeff_ctl *ctl = (struct wm_coeff_ctl *)kctl->private_value;
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
 
 	uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
 	uinfo->count = ctl->len;
@@ -763,7 +776,9 @@
 static int wm_coeff_put(struct snd_kcontrol *kctl,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	struct wm_coeff_ctl *ctl = (struct wm_coeff_ctl *)kctl->private_value;
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
 	char *p = ucontrol->value.bytes.data;
 	int ret = 0;
 
@@ -780,6 +795,29 @@
 	return ret;
 }
 
+static int wm_coeff_tlv_put(struct snd_kcontrol *kctl,
+			    const unsigned int __user *bytes, unsigned int size)
+{
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
+	int ret = 0;
+
+	mutex_lock(&ctl->dsp->pwr_lock);
+
+	if (copy_from_user(ctl->cache, bytes, size)) {
+		ret = -EFAULT;
+	} else {
+		ctl->set = 1;
+		if (ctl->enabled)
+			ret = wm_coeff_write_control(ctl, ctl->cache, size);
+	}
+
+	mutex_unlock(&ctl->dsp->pwr_lock);
+
+	return ret;
+}
+
 static int wm_coeff_read_control(struct wm_coeff_ctl *ctl,
 				 void *buf, size_t len)
 {
@@ -822,7 +860,9 @@
 static int wm_coeff_get(struct snd_kcontrol *kctl,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	struct wm_coeff_ctl *ctl = (struct wm_coeff_ctl *)kctl->private_value;
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
 	char *p = ucontrol->value.bytes.data;
 	int ret = 0;
 
@@ -845,12 +885,72 @@
 	return ret;
 }
 
+static int wm_coeff_tlv_get(struct snd_kcontrol *kctl,
+			    unsigned int __user *bytes, unsigned int size)
+{
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
+	int ret = 0;
+
+	mutex_lock(&ctl->dsp->pwr_lock);
+
+	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) {
+		if (ctl->enabled)
+			ret = wm_coeff_read_control(ctl, ctl->cache, size);
+		else
+			ret = -EPERM;
+	} else {
+		if (!ctl->flags && ctl->enabled)
+			ret = wm_coeff_read_control(ctl, ctl->cache, size);
+	}
+
+	if (!ret && copy_to_user(bytes, ctl->cache, size))
+		ret = -EFAULT;
+
+	mutex_unlock(&ctl->dsp->pwr_lock);
+
+	return ret;
+}
+
 struct wmfw_ctl_work {
 	struct wm_adsp *dsp;
 	struct wm_coeff_ctl *ctl;
 	struct work_struct work;
 };
 
+static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len)
+{
+	unsigned int out, rd, wr, vol;
+
+	if (len > ADSP_MAX_STD_CTRL_SIZE) {
+		rd = SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		wr = SNDRV_CTL_ELEM_ACCESS_TLV_WRITE;
+		vol = SNDRV_CTL_ELEM_ACCESS_VOLATILE;
+
+		out = SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+	} else {
+		rd = SNDRV_CTL_ELEM_ACCESS_READ;
+		wr = SNDRV_CTL_ELEM_ACCESS_WRITE;
+		vol = SNDRV_CTL_ELEM_ACCESS_VOLATILE;
+
+		out = 0;
+	}
+
+	if (in) {
+		if (in & WMFW_CTL_FLAG_READABLE)
+			out |= rd;
+		if (in & WMFW_CTL_FLAG_WRITEABLE)
+			out |= wr;
+		if (in & WMFW_CTL_FLAG_VOLATILE)
+			out |= vol;
+	} else {
+		out |= rd | wr | vol;
+	}
+
+	return out;
+}
+
 static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl)
 {
 	struct snd_kcontrol_new *kcontrol;
@@ -868,19 +968,15 @@
 	kcontrol->info = wm_coeff_info;
 	kcontrol->get = wm_coeff_get;
 	kcontrol->put = wm_coeff_put;
-	kcontrol->private_value = (unsigned long)ctl;
+	kcontrol->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+	kcontrol->tlv.c = snd_soc_bytes_tlv_callback;
+	kcontrol->private_value = (unsigned long)&ctl->bytes_ext;
 
-	if (ctl->flags) {
-		if (ctl->flags & WMFW_CTL_FLAG_WRITEABLE)
-			kcontrol->access |= SNDRV_CTL_ELEM_ACCESS_WRITE;
-		if (ctl->flags & WMFW_CTL_FLAG_READABLE)
-			kcontrol->access |= SNDRV_CTL_ELEM_ACCESS_READ;
-		if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
-			kcontrol->access |= SNDRV_CTL_ELEM_ACCESS_VOLATILE;
-	} else {
-		kcontrol->access = SNDRV_CTL_ELEM_ACCESS_READWRITE;
-		kcontrol->access |= SNDRV_CTL_ELEM_ACCESS_VOLATILE;
-	}
+	ctl->bytes_ext.max = ctl->len;
+	ctl->bytes_ext.get = wm_coeff_tlv_get;
+	ctl->bytes_ext.put = wm_coeff_tlv_put;
+
+	kcontrol->access = wmfw_convert_flags(ctl->flags, ctl->len);
 
 	ret = snd_soc_add_card_controls(dsp->card, kcontrol, 1);
 	if (ret < 0)
@@ -944,6 +1040,13 @@
 	kfree(ctl_work);
 }
 
+static void wm_adsp_free_ctl_blk(struct wm_coeff_ctl *ctl)
+{
+	kfree(ctl->cache);
+	kfree(ctl->name);
+	kfree(ctl);
+}
+
 static int wm_adsp_create_control(struct wm_adsp *dsp,
 				  const struct wm_adsp_alg_region *alg_region,
 				  unsigned int offset, unsigned int len,
@@ -1032,11 +1135,6 @@
 
 	ctl->flags = flags;
 	ctl->offset = offset;
-	if (len > 512) {
-		adsp_warn(dsp, "Truncating control %s from %d\n",
-			  ctl->name, len);
-		len = 512;
-	}
 	ctl->len = len;
 	ctl->cache = kzalloc(ctl->len, GFP_KERNEL);
 	if (!ctl->cache) {
@@ -1564,6 +1662,19 @@
 	return alg_region;
 }
 
+static void wm_adsp_free_alg_regions(struct wm_adsp *dsp)
+{
+	struct wm_adsp_alg_region *alg_region;
+
+	while (!list_empty(&dsp->alg_regions)) {
+		alg_region = list_first_entry(&dsp->alg_regions,
+					      struct wm_adsp_alg_region,
+					      list);
+		list_del(&alg_region->list);
+		kfree(alg_region);
+	}
+}
+
 static int wm_adsp1_setup_algs(struct wm_adsp *dsp)
 {
 	struct wmfw_adsp1_id_hdr adsp1_id;
@@ -1994,7 +2105,6 @@
 	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
 	struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec);
 	struct wm_adsp *dsp = &dsps[w->shift];
-	struct wm_adsp_alg_region *alg_region;
 	struct wm_coeff_ctl *ctl;
 	int ret;
 	unsigned int val;
@@ -2074,13 +2184,8 @@
 		list_for_each_entry(ctl, &dsp->ctl_list, list)
 			ctl->enabled = 0;
 
-		while (!list_empty(&dsp->alg_regions)) {
-			alg_region = list_first_entry(&dsp->alg_regions,
-						      struct wm_adsp_alg_region,
-						      list);
-			list_del(&alg_region->list);
-			kfree(alg_region);
-		}
+
+		wm_adsp_free_alg_regions(dsp);
 		break;
 
 	default:
@@ -2222,7 +2327,6 @@
 	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
 	struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec);
 	struct wm_adsp *dsp = &dsps[w->shift];
-	struct wm_adsp_alg_region *alg_region;
 	struct wm_coeff_ctl *ctl;
 	int ret;
 
@@ -2240,9 +2344,13 @@
 		if (ret != 0)
 			goto err;
 
+		mutex_lock(&dsp->pwr_lock);
+
 		if (wm_adsp_fw[dsp->fw].num_caps != 0)
 			ret = wm_adsp_buffer_init(dsp);
 
+		mutex_unlock(&dsp->pwr_lock);
+
 		break;
 
 	case SND_SOC_DAPM_PRE_PMD:
@@ -2269,13 +2377,7 @@
 		list_for_each_entry(ctl, &dsp->ctl_list, list)
 			ctl->enabled = 0;
 
-		while (!list_empty(&dsp->alg_regions)) {
-			alg_region = list_first_entry(&dsp->alg_regions,
-						      struct wm_adsp_alg_region,
-						      list);
-			list_del(&alg_region->list);
-			kfree(alg_region);
-		}
+		wm_adsp_free_alg_regions(dsp);
 
 		if (wm_adsp_fw[dsp->fw].num_caps != 0)
 			wm_adsp_buffer_free(dsp);
@@ -2340,6 +2442,54 @@
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_init);
 
+void wm_adsp2_remove(struct wm_adsp *dsp)
+{
+	struct wm_coeff_ctl *ctl;
+
+	while (!list_empty(&dsp->ctl_list)) {
+		ctl = list_first_entry(&dsp->ctl_list, struct wm_coeff_ctl,
+					list);
+		list_del(&ctl->list);
+		wm_adsp_free_ctl_blk(ctl);
+	}
+}
+EXPORT_SYMBOL_GPL(wm_adsp2_remove);
+
+static inline int wm_adsp_compr_attached(struct wm_adsp_compr *compr)
+{
+	return compr->buf != NULL;
+}
+
+static int wm_adsp_compr_attach(struct wm_adsp_compr *compr)
+{
+	/*
+	 * Note this will be more complex once each DSP can support multiple
+	 * streams
+	 */
+	if (!compr->dsp->buffer)
+		return -EINVAL;
+
+	compr->buf = compr->dsp->buffer;
+	compr->buf->compr = compr;
+
+	return 0;
+}
+
+static void wm_adsp_compr_detach(struct wm_adsp_compr *compr)
+{
+	if (!compr)
+		return;
+
+	/* Wake the poll so it can see buffer is no longer attached */
+	if (compr->stream)
+		snd_compr_fragment_elapsed(compr->stream);
+
+	if (wm_adsp_compr_attached(compr)) {
+		compr->buf->compr = NULL;
+		compr->buf = NULL;
+	}
+}
+
 int wm_adsp_compr_open(struct wm_adsp *dsp, struct snd_compr_stream *stream)
 {
 	struct wm_adsp_compr *compr;
@@ -2393,6 +2543,7 @@
 
 	mutex_lock(&dsp->pwr_lock);
 
+	wm_adsp_compr_detach(compr);
 	dsp->compr = NULL;
 
 	kfree(compr->raw_buf);
@@ -2689,6 +2840,8 @@
 static int wm_adsp_buffer_free(struct wm_adsp *dsp)
 {
 	if (dsp->buffer) {
+		wm_adsp_compr_detach(dsp->buffer->compr);
+
 		kfree(dsp->buffer->regions);
 		kfree(dsp->buffer);
 
@@ -2698,25 +2851,6 @@
 	return 0;
 }
 
-static inline int wm_adsp_compr_attached(struct wm_adsp_compr *compr)
-{
-	return compr->buf != NULL;
-}
-
-static int wm_adsp_compr_attach(struct wm_adsp_compr *compr)
-{
-	/*
-	 * Note this will be more complex once each DSP can support multiple
-	 * streams
-	 */
-	if (!compr->dsp->buffer)
-		return -EINVAL;
-
-	compr->buf = compr->dsp->buffer;
-
-	return 0;
-}
-
 int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd)
 {
 	struct wm_adsp_compr *compr = stream->runtime->private_data;
@@ -2805,21 +2939,41 @@
 		avail += wm_adsp_buffer_size(buf);
 
 	adsp_dbg(buf->dsp, "readindex=0x%x, writeindex=0x%x, avail=%d\n",
-		 buf->read_index, write_index, avail);
+		 buf->read_index, write_index, avail * WM_ADSP_DATA_WORD_SIZE);
 
 	buf->avail = avail;
 
 	return 0;
 }
 
+static int wm_adsp_buffer_get_error(struct wm_adsp_compr_buf *buf)
+{
+	int ret;
+
+	ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(error), &buf->error);
+	if (ret < 0) {
+		adsp_err(buf->dsp, "Failed to check buffer error: %d\n", ret);
+		return ret;
+	}
+	if (buf->error != 0) {
+		adsp_err(buf->dsp, "Buffer error occurred: %d\n", buf->error);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 int wm_adsp_compr_handle_irq(struct wm_adsp *dsp)
 {
-	struct wm_adsp_compr_buf *buf = dsp->buffer;
-	struct wm_adsp_compr *compr = dsp->compr;
+	struct wm_adsp_compr_buf *buf;
+	struct wm_adsp_compr *compr;
 	int ret = 0;
 
 	mutex_lock(&dsp->pwr_lock);
 
+	buf = dsp->buffer;
+	compr = dsp->compr;
+
 	if (!buf) {
 		ret = -ENODEV;
 		goto out;
@@ -2827,16 +2981,9 @@
 
 	adsp_dbg(dsp, "Handling buffer IRQ\n");
 
-	ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(error), &buf->error);
-	if (ret < 0) {
-		adsp_err(dsp, "Failed to check buffer error: %d\n", ret);
-		goto out;
-	}
-	if (buf->error != 0) {
-		adsp_err(dsp, "Buffer error occurred: %d\n", buf->error);
-		ret = -EIO;
-		goto out;
-	}
+	ret = wm_adsp_buffer_get_error(buf);
+	if (ret < 0)
+		goto out_notify; /* Wake poll to report error */
 
 	ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(irq_count),
 				  &buf->irq_count);
@@ -2851,6 +2998,7 @@
 		goto out;
 	}
 
+out_notify:
 	if (compr && compr->stream)
 		snd_compr_fragment_elapsed(compr->stream);
 
@@ -2879,14 +3027,16 @@
 			  struct snd_compr_tstamp *tstamp)
 {
 	struct wm_adsp_compr *compr = stream->runtime->private_data;
-	struct wm_adsp_compr_buf *buf = compr->buf;
 	struct wm_adsp *dsp = compr->dsp;
+	struct wm_adsp_compr_buf *buf;
 	int ret = 0;
 
 	adsp_dbg(dsp, "Pointer request\n");
 
 	mutex_lock(&dsp->pwr_lock);
 
+	buf = compr->buf;
+
 	if (!compr->buf) {
 		ret = -ENXIO;
 		goto out;
@@ -2909,6 +3059,10 @@
 		 * DSP to inform us once a whole fragment is available.
 		 */
 		if (buf->avail < wm_adsp_compr_frag_words(compr)) {
+			ret = wm_adsp_buffer_get_error(buf);
+			if (ret < 0)
+				goto out;
+
 			ret = wm_adsp_buffer_reenable_irq(buf);
 			if (ret < 0) {
 				adsp_err(dsp,
diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h
index b61cb57..feb61e2 100644
--- a/sound/soc/codecs/wm_adsp.h
+++ b/sound/soc/codecs/wm_adsp.h
@@ -92,6 +92,7 @@
 
 int wm_adsp1_init(struct wm_adsp *dsp);
 int wm_adsp2_init(struct wm_adsp *dsp);
+void wm_adsp2_remove(struct wm_adsp *dsp);
 int wm_adsp2_codec_probe(struct wm_adsp *dsp, struct snd_soc_codec *codec);
 int wm_adsp2_codec_remove(struct wm_adsp *dsp, struct snd_soc_codec *codec);
 int wm_adsp1_event(struct snd_soc_dapm_widget *w,
diff --git a/sound/soc/davinci/Kconfig b/sound/soc/davinci/Kconfig
index 50ca291..6b732d8 100644
--- a/sound/soc/davinci/Kconfig
+++ b/sound/soc/davinci/Kconfig
@@ -16,7 +16,11 @@
 	  - DRA7xx family
 
 config SND_DAVINCI_SOC_I2S
-	tristate
+	tristate "DaVinci Multichannel Buffered Serial Port (McBSP) support"
+	depends on SND_EDMA_SOC
+	help
+	  Say Y or M here if you want to have support for McBSP IP found in
+	  Texas Instruments DaVinci DA850 SoCs.
 
 config SND_DAVINCI_SOC_MCASP
 	tristate "Multichannel Audio Serial Port (McASP) support"
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index ec98548..3849616 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -4,9 +4,15 @@
  * Author:      Vladimir Barinov, <vbarinov@embeddedalley.com>
  * Copyright:   (C) 2007 MontaVista Software, Inc., <source@mvista.com>
  *
+ * DT support	(c) 2016 Petr Kulhavy, Barix AG <petr@barix.com>
+ *		based on davinci-mcasp.c DT support
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * TODO:
+ * on DA850 implement HW FIFOs instead of DMA into DXR and DRR registers
  */
 
 #include <linux/init.h>
@@ -650,13 +656,24 @@
 
 static int davinci_i2s_probe(struct platform_device *pdev)
 {
+	struct snd_dmaengine_dai_dma_data *dma_data;
 	struct davinci_mcbsp_dev *dev;
 	struct resource *mem, *res;
 	void __iomem *io_base;
 	int *dma;
 	int ret;
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mpu");
+	if (!mem) {
+		dev_warn(&pdev->dev,
+			 "\"mpu\" mem resource not found, using index 0\n");
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!mem) {
+			dev_err(&pdev->dev, "no mem resource?\n");
+			return -ENODEV;
+		}
+	}
+
 	io_base = devm_ioremap_resource(&pdev->dev, mem);
 	if (IS_ERR(io_base))
 		return PTR_ERR(io_base);
@@ -666,40 +683,44 @@
 	if (!dev)
 		return -ENOMEM;
 
+	dev->base = io_base;
+
+	/* setup DMA, first TX, then RX */
+	dma_data = &dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK];
+	dma_data->addr = (dma_addr_t)(mem->start + DAVINCI_MCBSP_DXR_REG);
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (res) {
+		dma = &dev->dma_request[SNDRV_PCM_STREAM_PLAYBACK];
+		*dma = res->start;
+		dma_data->filter_data = dma;
+	} else if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
+		dma_data->filter_data = "tx";
+	} else {
+		dev_err(&pdev->dev, "Missing DMA tx resource\n");
+		return -ENODEV;
+	}
+
+	dma_data = &dev->dma_data[SNDRV_PCM_STREAM_CAPTURE];
+	dma_data->addr = (dma_addr_t)(mem->start + DAVINCI_MCBSP_DRR_REG);
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (res) {
+		dma = &dev->dma_request[SNDRV_PCM_STREAM_CAPTURE];
+		*dma = res->start;
+		dma_data->filter_data = dma;
+	} else if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
+		dma_data->filter_data = "rx";
+	} else {
+		dev_err(&pdev->dev, "Missing DMA rx resource\n");
+		return -ENODEV;
+	}
+
 	dev->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dev->clk))
 		return -ENODEV;
 	clk_enable(dev->clk);
 
-	dev->base = io_base;
-
-	dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].addr =
-	    (dma_addr_t)(mem->start + DAVINCI_MCBSP_DXR_REG);
-
-	dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].addr =
-	    (dma_addr_t)(mem->start + DAVINCI_MCBSP_DRR_REG);
-
-	/* first TX, then RX */
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no DMA resource\n");
-		ret = -ENXIO;
-		goto err_release_clk;
-	}
-	dma = &dev->dma_request[SNDRV_PCM_STREAM_PLAYBACK];
-	*dma = res->start;
-	dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].filter_data = dma;
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!res) {
-		dev_err(&pdev->dev, "no DMA resource\n");
-		ret = -ENXIO;
-		goto err_release_clk;
-	}
-	dma = &dev->dma_request[SNDRV_PCM_STREAM_CAPTURE];
-	*dma = res->start;
-	dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].filter_data = dma;
-
 	dev->dev = &pdev->dev;
 	dev_set_drvdata(&pdev->dev, dev);
 
@@ -737,11 +758,18 @@
 	return 0;
 }
 
+static const struct of_device_id davinci_i2s_match[] = {
+	{ .compatible = "ti,da850-mcbsp" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, davinci_i2s_match);
+
 static struct platform_driver davinci_mcbsp_driver = {
 	.probe		= davinci_i2s_probe,
 	.remove		= davinci_i2s_remove,
 	.driver		= {
 		.name	= "davinci-mcbsp",
+		.of_match_table = of_match_ptr(davinci_i2s_match),
 	},
 };
 
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index e132498..0f66fda 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -489,7 +489,7 @@
 		mcasp_clr_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG, AFSRE);
 
 		mcasp_clr_bits(mcasp, DAVINCI_MCASP_PDIR_REG,
-			       ACLKX | AHCLKX | AFSX | ACLKR | AHCLKR | AFSR);
+			       ACLKX | AFSX | ACLKR | AHCLKR | AFSR);
 		mcasp->bclk_master = 0;
 		break;
 	default:
@@ -540,21 +540,19 @@
 	return ret;
 }
 
-static int __davinci_mcasp_set_clkdiv(struct snd_soc_dai *dai, int div_id,
+static int __davinci_mcasp_set_clkdiv(struct davinci_mcasp *mcasp, int div_id,
 				      int div, bool explicit)
 {
-	struct davinci_mcasp *mcasp = snd_soc_dai_get_drvdata(dai);
-
 	pm_runtime_get_sync(mcasp->dev);
 	switch (div_id) {
-	case 0:		/* MCLK divider */
+	case MCASP_CLKDIV_AUXCLK:			/* MCLK divider */
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_AHCLKXCTL_REG,
 			       AHCLKXDIV(div - 1), AHCLKXDIV_MASK);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_AHCLKRCTL_REG,
 			       AHCLKRDIV(div - 1), AHCLKRDIV_MASK);
 		break;
 
-	case 1:		/* BCLK divider */
+	case MCASP_CLKDIV_BCLK:			/* BCLK divider */
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG,
 			       ACLKXDIV(div - 1), ACLKXDIV_MASK);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG,
@@ -563,7 +561,8 @@
 			mcasp->bclk_div = div;
 		break;
 
-	case 2:	/*
+	case MCASP_CLKDIV_BCLK_FS_RATIO:
+		/*
 		 * BCLK/LRCLK ratio descries how many bit-clock cycles
 		 * fit into one frame. The clock ratio is given for a
 		 * full period of data (for I2S format both left and
@@ -591,7 +590,9 @@
 static int davinci_mcasp_set_clkdiv(struct snd_soc_dai *dai, int div_id,
 				    int div)
 {
-	return __davinci_mcasp_set_clkdiv(dai, div_id, div, 1);
+	struct davinci_mcasp *mcasp = snd_soc_dai_get_drvdata(dai);
+
+	return __davinci_mcasp_set_clkdiv(mcasp, div_id, div, 1);
 }
 
 static int davinci_mcasp_set_sysclk(struct snd_soc_dai *dai, int clk_id,
@@ -999,27 +1000,53 @@
 }
 
 static int davinci_mcasp_calc_clk_div(struct davinci_mcasp *mcasp,
-				      unsigned int bclk_freq,
-				      int *error_ppm)
+				      unsigned int bclk_freq, bool set)
 {
-	int div = mcasp->sysclk_freq / bclk_freq;
-	int rem = mcasp->sysclk_freq % bclk_freq;
+	int error_ppm;
+	unsigned int sysclk_freq = mcasp->sysclk_freq;
+	u32 reg = mcasp_get_reg(mcasp, DAVINCI_MCASP_AHCLKXCTL_REG);
+	int div = sysclk_freq / bclk_freq;
+	int rem = sysclk_freq % bclk_freq;
+	int aux_div = 1;
+
+	if (div > (ACLKXDIV_MASK + 1)) {
+		if (reg & AHCLKXE) {
+			aux_div = div / (ACLKXDIV_MASK + 1);
+			if (div % (ACLKXDIV_MASK + 1))
+				aux_div++;
+
+			sysclk_freq /= aux_div;
+			div = sysclk_freq / bclk_freq;
+			rem = sysclk_freq % bclk_freq;
+		} else if (set) {
+			dev_warn(mcasp->dev, "Too fast reference clock (%u)\n",
+				 sysclk_freq);
+		}
+	}
 
 	if (rem != 0) {
 		if (div == 0 ||
-		    ((mcasp->sysclk_freq / div) - bclk_freq) >
-		    (bclk_freq - (mcasp->sysclk_freq / (div+1)))) {
+		    ((sysclk_freq / div) - bclk_freq) >
+		    (bclk_freq - (sysclk_freq / (div+1)))) {
 			div++;
 			rem = rem - bclk_freq;
 		}
 	}
-	if (error_ppm)
-		*error_ppm =
-			(div*1000000 + (int)div64_long(1000000LL*rem,
-						       (int)bclk_freq))
-			/div - 1000000;
+	error_ppm = (div*1000000 + (int)div64_long(1000000LL*rem,
+		     (int)bclk_freq)) / div - 1000000;
 
-	return div;
+	if (set) {
+		if (error_ppm)
+			dev_info(mcasp->dev, "Sample-rate is off by %d PPM\n",
+				 error_ppm);
+
+		__davinci_mcasp_set_clkdiv(mcasp, MCASP_CLKDIV_BCLK, div, 0);
+		if (reg & AHCLKXE)
+			__davinci_mcasp_set_clkdiv(mcasp, MCASP_CLKDIV_AUXCLK,
+						   aux_div, 0);
+	}
+
+	return error_ppm;
 }
 
 static int davinci_mcasp_hw_params(struct snd_pcm_substream *substream,
@@ -1044,18 +1071,11 @@
 		int slots = mcasp->tdm_slots;
 		int rate = params_rate(params);
 		int sbits = params_width(params);
-		int ppm, div;
 
 		if (mcasp->slot_width)
 			sbits = mcasp->slot_width;
 
-		div = davinci_mcasp_calc_clk_div(mcasp, rate*sbits*slots,
-						 &ppm);
-		if (ppm)
-			dev_info(mcasp->dev, "Sample-rate is off by %d PPM\n",
-				 ppm);
-
-		__davinci_mcasp_set_clkdiv(cpu_dai, 1, div, 0);
+		davinci_mcasp_calc_clk_div(mcasp, rate * sbits * slots, true);
 	}
 
 	ret = mcasp_common_hw_param(mcasp, substream->stream,
@@ -1166,7 +1186,8 @@
 				davinci_mcasp_dai_rates[i];
 			int ppm;
 
-			davinci_mcasp_calc_clk_div(rd->mcasp, bclk_freq, &ppm);
+			ppm = davinci_mcasp_calc_clk_div(rd->mcasp, bclk_freq,
+							 false);
 			if (abs(ppm) < DAVINCI_MAX_RATE_ERROR_PPM) {
 				if (range.empty) {
 					range.min = davinci_mcasp_dai_rates[i];
@@ -1205,8 +1226,9 @@
 			if (rd->mcasp->slot_width)
 				sbits = rd->mcasp->slot_width;
 
-			davinci_mcasp_calc_clk_div(rd->mcasp, sbits*slots*rate,
-						   &ppm);
+			ppm = davinci_mcasp_calc_clk_div(rd->mcasp,
+							 sbits * slots * rate,
+							 false);
 			if (abs(ppm) < DAVINCI_MAX_RATE_ERROR_PPM) {
 				snd_mask_set(&nfmt, i);
 				count++;
@@ -1230,11 +1252,15 @@
 	int i, dir;
 	int tdm_slots = mcasp->tdm_slots;
 
-	if (mcasp->tdm_mask[substream->stream])
-		tdm_slots = hweight32(mcasp->tdm_mask[substream->stream]);
+	/* Do not allow more then one stream per direction */
+	if (mcasp->substreams[substream->stream])
+		return -EBUSY;
 
 	mcasp->substreams[substream->stream] = substream;
 
+	if (mcasp->tdm_mask[substream->stream])
+		tdm_slots = hweight32(mcasp->tdm_mask[substream->stream]);
+
 	if (mcasp->op_mode == DAVINCI_MCASP_DIT_MODE)
 		return 0;
 
diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h
index a3be108..1e8787f 100644
--- a/sound/soc/davinci/davinci-mcasp.h
+++ b/sound/soc/davinci/davinci-mcasp.h
@@ -306,4 +306,9 @@
 #define NUMEVT(x)	(((x) & 0xFF) << 8)
 #define NUMDMA_MASK	(0xFF)
 
+/* clock divider IDs */
+#define MCASP_CLKDIV_AUXCLK		0 /* HCLK divider from AUXCLK */
+#define MCASP_CLKDIV_BCLK		1 /* BCLK divider from HCLK */
+#define MCASP_CLKDIV_BCLK_FS_RATIO	2 /* to set BCLK FS ration */
+
 #endif	/* DAVINCI_MCASP_H */
diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c
index bff258d..0db69b7 100644
--- a/sound/soc/dwc/designware_i2s.c
+++ b/sound/soc/dwc/designware_i2s.c
@@ -100,6 +100,7 @@
 	struct device *dev;
 	u32 ccr;
 	u32 xfer_resolution;
+	u32 fifo_th;
 
 	/* data related to DMA transfers b/w i2s and DMAC */
 	union dw_i2s_snd_dma_data play_dma_data;
@@ -147,17 +148,18 @@
 static void i2s_start(struct dw_i2s_dev *dev,
 		      struct snd_pcm_substream *substream)
 {
+	struct i2s_clk_config_data *config = &dev->config;
 	u32 i, irq;
 	i2s_write_reg(dev->i2s_base, IER, 1);
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < (config->chan_nr / 2); i++) {
 			irq = i2s_read_reg(dev->i2s_base, IMR(i));
 			i2s_write_reg(dev->i2s_base, IMR(i), irq & ~0x30);
 		}
 		i2s_write_reg(dev->i2s_base, ITER, 1);
 	} else {
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < (config->chan_nr / 2); i++) {
 			irq = i2s_read_reg(dev->i2s_base, IMR(i));
 			i2s_write_reg(dev->i2s_base, IMR(i), irq & ~0x03);
 		}
@@ -231,14 +233,16 @@
 		if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
 			i2s_write_reg(dev->i2s_base, TCR(ch_reg),
 				      dev->xfer_resolution);
-			i2s_write_reg(dev->i2s_base, TFCR(ch_reg), 0x02);
+			i2s_write_reg(dev->i2s_base, TFCR(ch_reg),
+				      dev->fifo_th - 1);
 			irq = i2s_read_reg(dev->i2s_base, IMR(ch_reg));
 			i2s_write_reg(dev->i2s_base, IMR(ch_reg), irq & ~0x30);
 			i2s_write_reg(dev->i2s_base, TER(ch_reg), 1);
 		} else {
 			i2s_write_reg(dev->i2s_base, RCR(ch_reg),
 				      dev->xfer_resolution);
-			i2s_write_reg(dev->i2s_base, RFCR(ch_reg), 0x07);
+			i2s_write_reg(dev->i2s_base, RFCR(ch_reg),
+				      dev->fifo_th - 1);
 			irq = i2s_read_reg(dev->i2s_base, IMR(ch_reg));
 			i2s_write_reg(dev->i2s_base, IMR(ch_reg), irq & ~0x03);
 			i2s_write_reg(dev->i2s_base, RER(ch_reg), 1);
@@ -498,6 +502,7 @@
 	 */
 	u32 comp1 = i2s_read_reg(dev->i2s_base, dev->i2s_reg_comp1);
 	u32 comp2 = i2s_read_reg(dev->i2s_base, dev->i2s_reg_comp2);
+	u32 fifo_depth = 1 << (1 + COMP1_FIFO_DEPTH_GLOBAL(comp1));
 	u32 idx;
 
 	if (dev->capability & DWC_I2S_RECORD &&
@@ -536,6 +541,7 @@
 		dev->capability |= DW_I2S_SLAVE;
 	}
 
+	dev->fifo_th = fifo_depth / 2;
 	return 0;
 }
 
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 0754df7..2147994 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -21,6 +21,8 @@
 #include <sound/core.h>
 #include <sound/dmaengine_pcm.h>
 #include <sound/pcm_params.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 
 #include "fsl_sai.h"
 #include "imx-pcm.h"
@@ -786,10 +788,12 @@
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct fsl_sai *sai;
+	struct regmap *gpr;
 	struct resource *res;
 	void __iomem *base;
 	char tmp[8];
 	int irq, ret, i;
+	int index;
 
 	sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL);
 	if (!sai)
@@ -797,7 +801,8 @@
 
 	sai->pdev = pdev;
 
-	if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx6sx-sai"))
+	if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx6sx-sai") ||
+	    of_device_is_compatible(pdev->dev.of_node, "fsl,imx6ul-sai"))
 		sai->sai_on_imx = true;
 
 	sai->is_lsb_first = of_property_read_bool(np, "lsb-first");
@@ -877,6 +882,22 @@
 		fsl_sai_dai.symmetric_samplebits = 0;
 	}
 
+	if (of_find_property(np, "fsl,sai-mclk-direction-output", NULL) &&
+	    of_device_is_compatible(pdev->dev.of_node, "fsl,imx6ul-sai")) {
+		gpr = syscon_regmap_lookup_by_compatible("fsl,imx6ul-iomuxc-gpr");
+		if (IS_ERR(gpr)) {
+			dev_err(&pdev->dev, "cannot find iomuxc registers\n");
+			return PTR_ERR(gpr);
+		}
+
+		index = of_alias_get_id(np, "sai");
+		if (index < 0)
+			return index;
+
+		regmap_update_bits(gpr, IOMUXC_GPR1, MCLK_DIR(index),
+				   MCLK_DIR(index));
+	}
+
 	sai->dma_params_rx.addr = res->start + FSL_SAI_RDR;
 	sai->dma_params_tx.addr = res->start + FSL_SAI_TDR;
 	sai->dma_params_rx.maxburst = FSL_SAI_MAXBURST_RX;
@@ -898,6 +919,7 @@
 static const struct of_device_id fsl_sai_ids[] = {
 	{ .compatible = "fsl,vf610-sai", },
 	{ .compatible = "fsl,imx6sx-sai", },
+	{ .compatible = "fsl,imx6ul-sai", },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fsl_sai_ids);
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index ed8de10..632ecc0 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -137,6 +137,7 @@
 	case CCSR_SSI_SACDAT:
 	case CCSR_SSI_SATAG:
 	case CCSR_SSI_SACCST:
+	case CCSR_SSI_SOR:
 		return true;
 	default:
 		return false;
@@ -261,6 +262,7 @@
 	struct fsl_ssi_dbg dbg_stats;
 
 	const struct fsl_ssi_soc_data *soc;
+	struct device *dev;
 };
 
 /*
@@ -400,6 +402,26 @@
 }
 
 /*
+ * Clear RX or TX FIFO to remove samples from the previous
+ * stream session which may be still present in the FIFO and
+ * may introduce bad samples and/or channel slipping.
+ *
+ * Note: The SOR is not documented in recent IMX datasheet, but
+ * is described in IMX51 reference manual at section 56.3.3.15.
+ */
+static void fsl_ssi_fifo_clear(struct fsl_ssi_private *ssi_private,
+		bool is_rx)
+{
+	if (is_rx) {
+		regmap_update_bits(ssi_private->regs, CCSR_SSI_SOR,
+			CCSR_SSI_SOR_RX_CLR, CCSR_SSI_SOR_RX_CLR);
+	} else {
+		regmap_update_bits(ssi_private->regs, CCSR_SSI_SOR,
+			CCSR_SSI_SOR_TX_CLR, CCSR_SSI_SOR_TX_CLR);
+	}
+}
+
+/*
  * Calculate the bits that have to be disabled for the current stream that is
  * getting disabled. This keeps the bits enabled that are necessary for the
  * second stream to work if 'stream_active' is true.
@@ -474,9 +496,11 @@
 	 * (online configuration)
 	 */
 	if (enable) {
-		regmap_update_bits(regs, CCSR_SSI_SIER, vals->sier, vals->sier);
+		fsl_ssi_fifo_clear(ssi_private, vals->scr & CCSR_SSI_SCR_RE);
+
 		regmap_update_bits(regs, CCSR_SSI_SRCR, vals->srcr, vals->srcr);
 		regmap_update_bits(regs, CCSR_SSI_STCR, vals->stcr, vals->stcr);
+		regmap_update_bits(regs, CCSR_SSI_SIER, vals->sier, vals->sier);
 	} else {
 		u32 sier;
 		u32 srcr;
@@ -506,8 +530,40 @@
 
 config_done:
 	/* Enabling of subunits is done after configuration */
-	if (enable)
+	if (enable) {
+		if (ssi_private->use_dma && (vals->scr & CCSR_SSI_SCR_TE)) {
+			/*
+			 * Be sure the Tx FIFO is filled when TE is set.
+			 * Otherwise, there are some chances to start the
+			 * playback with some void samples inserted first,
+			 * generating a channel slip.
+			 *
+			 * First, SSIEN must be set, to let the FIFO be filled.
+			 *
+			 * Notes:
+			 * - Limit this fix to the DMA case until FIQ cases can
+			 *   be tested.
+			 * - Limit the length of the busy loop to not lock the
+			 *   system too long, even if 1-2 loops are sufficient
+			 *   in general.
+			 */
+			int i;
+			int max_loop = 100;
+			regmap_update_bits(regs, CCSR_SSI_SCR,
+					CCSR_SSI_SCR_SSIEN, CCSR_SSI_SCR_SSIEN);
+			for (i = 0; i < max_loop; i++) {
+				u32 sfcsr;
+				regmap_read(regs, CCSR_SSI_SFCSR, &sfcsr);
+				if (CCSR_SSI_SFCSR_TFCNT0(sfcsr))
+					break;
+			}
+			if (i == max_loop) {
+				dev_err(ssi_private->dev,
+					"Timeout waiting TX FIFO filling\n");
+			}
+		}
 		regmap_update_bits(regs, CCSR_SSI_SCR, vals->scr, vals->scr);
+	}
 }
 
 
@@ -670,6 +726,15 @@
 	if (IS_ERR(ssi_private->baudclk))
 		return -EINVAL;
 
+	/*
+	 * Hardware limitation: The bclk rate must be
+	 * never greater than 1/5 IPG clock rate
+	 */
+	if (freq * 5 > clk_get_rate(ssi_private->clk)) {
+		dev_err(cpu_dai->dev, "bitclk > ipgclk/5\n");
+		return -EINVAL;
+	}
+
 	baudclk_is_used = ssi_private->baudclk_streams & ~(BIT(substream->stream));
 
 	/* It should be already enough to divide clock by setting pm alone */
@@ -686,13 +751,6 @@
 		else
 			clkrate = clk_round_rate(ssi_private->baudclk, tmprate);
 
-		/*
-		 * Hardware limitation: The bclk rate must be
-		 * never greater than 1/5 IPG clock rate
-		 */
-		if (clkrate * 5 > clk_get_rate(ssi_private->clk))
-			continue;
-
 		clkrate /= factor;
 		afreq = clkrate / (i + 1);
 
@@ -1158,14 +1216,14 @@
 	.playback = {
 		.stream_name = "CPU-Playback",
 		.channels_min = 1,
-		.channels_max = 2,
+		.channels_max = 32,
 		.rates = FSLSSI_I2S_RATES,
 		.formats = FSLSSI_I2S_FORMATS,
 	},
 	.capture = {
 		.stream_name = "CPU-Capture",
 		.channels_min = 1,
-		.channels_max = 2,
+		.channels_max = 32,
 		.rates = FSLSSI_I2S_RATES,
 		.formats = FSLSSI_I2S_FORMATS,
 	},
@@ -1402,6 +1460,7 @@
 	}
 
 	ssi_private->soc = of_id->data;
+	ssi_private->dev = &pdev->dev;
 
 	sprop = of_get_property(np, "fsl,mode", NULL);
 	if (sprop) {
diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c
index e63cd5e..dac6688 100644
--- a/sound/soc/fsl/imx-pcm-fiq.c
+++ b/sound/soc/fsl/imx-pcm-fiq.c
@@ -220,7 +220,7 @@
 	ret = dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
 			  runtime->dma_addr, runtime->dma_bytes);
 
-	pr_debug("%s: ret: %d %p %pad 0x%08x\n", __func__, ret,
+	pr_debug("%s: ret: %d %p %pad 0x%08zx\n", __func__, ret,
 			runtime->dma_area,
 			&runtime->dma_addr,
 			runtime->dma_bytes);
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 1120f4f..91c15ab 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -58,6 +58,21 @@
 	  Say Y if you have such a device
 	  If unsure select "N".
 
+config SND_SOC_INTEL_BXT_RT298_MACH
+	tristate "ASoC Audio driver for Broxton with RT298 I2S mode"
+	depends on X86 && ACPI && I2C
+	select SND_SOC_INTEL_SST
+	select SND_SOC_INTEL_SKYLAKE
+	select SND_SOC_RT298
+	select SND_SOC_DMIC
+	select SND_SOC_HDAC_HDMI
+	select SND_HDA_DSP_LOADER
+	help
+	   This adds support for ASoC machine driver for Broxton platforms
+	   with RT286 I2S audio codec.
+	   Say Y if you have such a device
+	   If unsure select "N".
+
 config SND_SOC_INTEL_BYT_RT5640_MACH
 	tristate "ASoC Audio driver for Intel Baytrail with RT5640 codec"
 	depends on X86_INTEL_LPSS && I2C
@@ -162,6 +177,7 @@
 config SND_SOC_INTEL_SKYLAKE
 	tristate
 	select SND_HDA_EXT_CORE
+	select SND_HDA_DSP_LOADER
 	select SND_SOC_TOPOLOGY
 	select SND_SOC_INTEL_SST
 
diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c
index b97e6ad..98720a9 100644
--- a/sound/soc/intel/atom/sst-atom-controls.c
+++ b/sound/soc/intel/atom/sst-atom-controls.c
@@ -195,7 +195,7 @@
 
 	if (e->w && e->w->power)
 		ret = sst_send_slot_map(drv);
-	else
+	else if (!e->w)
 		dev_err(&drv->pdev->dev, "Slot control: %s doesn't have DAPM widget!!!\n",
 				kcontrol->id.name);
 	return ret;
diff --git a/sound/soc/intel/boards/Makefile b/sound/soc/intel/boards/Makefile
index 3310c0f..a850677 100644
--- a/sound/soc/intel/boards/Makefile
+++ b/sound/soc/intel/boards/Makefile
@@ -2,6 +2,7 @@
 snd-soc-sst-byt-rt5640-mach-objs := byt-rt5640.o
 snd-soc-sst-byt-max98090-mach-objs := byt-max98090.o
 snd-soc-sst-broadwell-objs := broadwell.o
+snd-soc-sst-bxt-rt298-objs := bxt_rt298.o
 snd-soc-sst-bytcr-rt5640-objs := bytcr_rt5640.o
 snd-soc-sst-bytcr-rt5651-objs := bytcr_rt5651.o
 snd-soc-sst-cht-bsw-rt5672-objs := cht_bsw_rt5672.o
@@ -14,6 +15,7 @@
 obj-$(CONFIG_SND_SOC_INTEL_HASWELL_MACH) += snd-soc-sst-haswell.o
 obj-$(CONFIG_SND_SOC_INTEL_BYT_RT5640_MACH) += snd-soc-sst-byt-rt5640-mach.o
 obj-$(CONFIG_SND_SOC_INTEL_BYT_MAX98090_MACH) += snd-soc-sst-byt-max98090-mach.o
+obj-$(CONFIG_SND_SOC_INTEL_BXT_RT298_MACH) += snd-soc-sst-bxt-rt298.o
 obj-$(CONFIG_SND_SOC_INTEL_BROADWELL_MACH) += snd-soc-sst-broadwell.o
 obj-$(CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH) += snd-soc-sst-bytcr-rt5640.o
 obj-$(CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH) += snd-soc-sst-bytcr-rt5651.o
diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c
index 3f8a1e1..7486a00 100644
--- a/sound/soc/intel/boards/broadwell.c
+++ b/sound/soc/intel/boards/broadwell.c
@@ -201,7 +201,7 @@
 	{
 		/* SSP0 - Codec */
 		.name = "Codec",
-		.be_id = 0,
+		.id = 0,
 		.cpu_dai_name = "snd-soc-dummy-dai",
 		.platform_name = "snd-soc-dummy",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c
new file mode 100644
index 0000000..f478751
--- /dev/null
+++ b/sound/soc/intel/boards/bxt_rt298.c
@@ -0,0 +1,353 @@
+/*
+ * Intel Broxton-P I2S Machine Driver
+ *
+ * Copyright (C) 2014-2016, Intel Corporation. All rights reserved.
+ *
+ * Modified from:
+ *   Intel Skylake I2S Machine driver
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/jack.h>
+#include <sound/pcm_params.h>
+#include "../../codecs/hdac_hdmi.h"
+#include "../../codecs/rt298.h"
+
+static struct snd_soc_jack broxton_headset;
+/* Headset jack detection DAPM pins */
+
+enum {
+	BXT_DPCM_AUDIO_PB = 0,
+	BXT_DPCM_AUDIO_CP,
+	BXT_DPCM_AUDIO_REF_CP,
+	BXT_DPCM_AUDIO_HDMI1_PB,
+	BXT_DPCM_AUDIO_HDMI2_PB,
+	BXT_DPCM_AUDIO_HDMI3_PB,
+};
+
+static struct snd_soc_jack_pin broxton_headset_pins[] = {
+	{
+		.pin = "Mic Jack",
+		.mask = SND_JACK_MICROPHONE,
+	},
+	{
+		.pin = "Headphone Jack",
+		.mask = SND_JACK_HEADPHONE,
+	},
+};
+
+static const struct snd_kcontrol_new broxton_controls[] = {
+	SOC_DAPM_PIN_SWITCH("Speaker"),
+	SOC_DAPM_PIN_SWITCH("Headphone Jack"),
+	SOC_DAPM_PIN_SWITCH("Mic Jack"),
+};
+
+static const struct snd_soc_dapm_widget broxton_widgets[] = {
+	SND_SOC_DAPM_HP("Headphone Jack", NULL),
+	SND_SOC_DAPM_SPK("Speaker", NULL),
+	SND_SOC_DAPM_MIC("Mic Jack", NULL),
+	SND_SOC_DAPM_MIC("DMIC2", NULL),
+	SND_SOC_DAPM_MIC("SoC DMIC", NULL),
+	SND_SOC_DAPM_SPK("HDMI1", NULL),
+	SND_SOC_DAPM_SPK("HDMI2", NULL),
+	SND_SOC_DAPM_SPK("HDMI3", NULL),
+};
+
+static const struct snd_soc_dapm_route broxton_rt298_map[] = {
+	/* speaker */
+	{"Speaker", NULL, "SPOR"},
+	{"Speaker", NULL, "SPOL"},
+
+	/* HP jack connectors - unknown if we have jack detect */
+	{"Headphone Jack", NULL, "HPO Pin"},
+
+	/* other jacks */
+	{"MIC1", NULL, "Mic Jack"},
+
+	/* digital mics */
+	{"DMIC1 Pin", NULL, "DMIC2"},
+	{"DMic", NULL, "SoC DMIC"},
+
+	{"HDMI1", NULL, "hif5 Output"},
+	{"HDMI2", NULL, "hif6 Output"},
+	{"HDMI3", NULL, "hif7 Output"},
+
+	/* CODEC BE connections */
+	{ "AIF1 Playback", NULL, "ssp5 Tx"},
+	{ "ssp5 Tx", NULL, "codec0_out"},
+
+	{ "codec0_in", NULL, "ssp5 Rx" },
+	{ "ssp5 Rx", NULL, "AIF1 Capture" },
+
+	{ "dmic01_hifi", NULL, "DMIC01 Rx" },
+	{ "DMIC01 Rx", NULL, "Capture" },
+
+	{ "hifi3", NULL, "iDisp3 Tx"},
+	{ "iDisp3 Tx", NULL, "iDisp3_out"},
+	{ "hifi2", NULL, "iDisp2 Tx"},
+	{ "iDisp2 Tx", NULL, "iDisp2_out"},
+	{ "hifi1", NULL, "iDisp1 Tx"},
+	{ "iDisp1 Tx", NULL, "iDisp1_out"},
+
+};
+
+static int broxton_rt298_codec_init(struct snd_soc_pcm_runtime *rtd)
+{
+	struct snd_soc_codec *codec = rtd->codec;
+	int ret = 0;
+
+	ret = snd_soc_card_jack_new(rtd->card, "Headset",
+		SND_JACK_HEADSET | SND_JACK_BTN_0,
+		&broxton_headset,
+		broxton_headset_pins, ARRAY_SIZE(broxton_headset_pins));
+
+	if (ret)
+		return ret;
+
+	rt298_mic_detect(codec, &broxton_headset);
+	return 0;
+}
+
+static int broxton_hdmi_init(struct snd_soc_pcm_runtime *rtd)
+{
+	struct snd_soc_dai *dai = rtd->codec_dai;
+
+	return hdac_hdmi_jack_init(dai, BXT_DPCM_AUDIO_HDMI1_PB + dai->id);
+}
+
+static int broxton_ssp5_fixup(struct snd_soc_pcm_runtime *rtd,
+			struct snd_pcm_hw_params *params)
+{
+	struct snd_interval *rate = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_RATE);
+	struct snd_interval *channels = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+
+	/* The ADSP will covert the FE rate to 48k, stereo */
+	rate->min = rate->max = 48000;
+	channels->min = channels->max = 2;
+
+	/* set SSP5 to 24 bit */
+	snd_mask_none(fmt);
+	snd_mask_set(fmt, SNDRV_PCM_FORMAT_S24_LE);
+
+	return 0;
+}
+
+static int broxton_rt298_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->codec_dai;
+	int ret;
+
+	ret = snd_soc_dai_set_sysclk(codec_dai, RT298_SCLK_S_PLL,
+					19200000, SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		dev_err(rtd->dev, "can't set codec sysclk configuration\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static struct snd_soc_ops broxton_rt298_ops = {
+	.hw_params = broxton_rt298_hw_params,
+};
+
+/* broxton digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link broxton_rt298_dais[] = {
+	/* Front End DAI links */
+	[BXT_DPCM_AUDIO_PB]
+	{
+		.name = "Bxt Audio Port",
+		.stream_name = "Audio",
+		.cpu_dai_name = "System Pin",
+		.platform_name = "0000:00:0e.0",
+		.nonatomic = 1,
+		.dynamic = 1,
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST},
+		.dpcm_playback = 1,
+	},
+	[BXT_DPCM_AUDIO_CP]
+	{
+		.name = "Bxt Audio Capture Port",
+		.stream_name = "Audio Record",
+		.cpu_dai_name = "System Pin",
+		.platform_name = "0000:00:0e.0",
+		.nonatomic = 1,
+		.dynamic = 1,
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST},
+		.dpcm_capture = 1,
+	},
+	[BXT_DPCM_AUDIO_REF_CP]
+	{
+		.name = "Bxt Audio Reference cap",
+		.stream_name = "refcap",
+		.cpu_dai_name = "Reference Pin",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.platform_name = "0000:00:0e.0",
+		.init = NULL,
+		.dpcm_capture = 1,
+		.nonatomic = 1,
+		.dynamic = 1,
+	},
+	[BXT_DPCM_AUDIO_HDMI1_PB]
+	{
+		.name = "Bxt HDMI Port1",
+		.stream_name = "Hdmi1",
+		.cpu_dai_name = "HDMI1 Pin",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.platform_name = "0000:00:0e.0",
+		.dpcm_playback = 1,
+		.init = NULL,
+		.nonatomic = 1,
+		.dynamic = 1,
+	},
+	[BXT_DPCM_AUDIO_HDMI2_PB]
+	{
+		.name = "Bxt HDMI Port2",
+		.stream_name = "Hdmi2",
+		.cpu_dai_name = "HDMI2 Pin",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.platform_name = "0000:00:0e.0",
+		.dpcm_playback = 1,
+		.init = NULL,
+		.nonatomic = 1,
+		.dynamic = 1,
+	},
+	[BXT_DPCM_AUDIO_HDMI3_PB]
+	{
+		.name = "Bxt HDMI Port3",
+		.stream_name = "Hdmi3",
+		.cpu_dai_name = "HDMI3 Pin",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.platform_name = "0000:00:0e.0",
+		.dpcm_playback = 1,
+		.init = NULL,
+		.nonatomic = 1,
+		.dynamic = 1,
+	},
+	/* Back End DAI links */
+	{
+		/* SSP5 - Codec */
+		.name = "SSP5-Codec",
+		.id = 0,
+		.cpu_dai_name = "SSP5 Pin",
+		.platform_name = "0000:00:0e.0",
+		.no_pcm = 1,
+		.codec_name = "i2c-INT343A:00",
+		.codec_dai_name = "rt298-aif1",
+		.init = broxton_rt298_codec_init,
+		.dai_fmt = SND_SOC_DAIFMT_DSP_A | SND_SOC_DAIFMT_NB_NF |
+						SND_SOC_DAIFMT_CBS_CFS,
+		.ignore_pmdown_time = 1,
+		.be_hw_params_fixup = broxton_ssp5_fixup,
+		.ops = &broxton_rt298_ops,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+	},
+	{
+		.name = "dmic01",
+		.id = 1,
+		.cpu_dai_name = "DMIC01 Pin",
+		.codec_name = "dmic-codec",
+		.codec_dai_name = "dmic-hifi",
+		.platform_name = "0000:00:0e.0",
+		.ignore_suspend = 1,
+		.dpcm_capture = 1,
+		.no_pcm = 1,
+	},
+	{
+		.name = "iDisp1",
+		.id = 3,
+		.cpu_dai_name = "iDisp1 Pin",
+		.codec_name = "ehdaudio0D2",
+		.codec_dai_name = "intel-hdmi-hifi1",
+		.platform_name = "0000:00:0e.0",
+		.init = broxton_hdmi_init,
+		.dpcm_playback = 1,
+		.no_pcm = 1,
+	},
+	{
+		.name = "iDisp2",
+		.id = 4,
+		.cpu_dai_name = "iDisp2 Pin",
+		.codec_name = "ehdaudio0D2",
+		.codec_dai_name = "intel-hdmi-hifi2",
+		.platform_name = "0000:00:0e.0",
+		.init = broxton_hdmi_init,
+		.dpcm_playback = 1,
+		.no_pcm = 1,
+	},
+	{
+		.name = "iDisp3",
+		.id = 5,
+		.cpu_dai_name = "iDisp3 Pin",
+		.codec_name = "ehdaudio0D2",
+		.codec_dai_name = "intel-hdmi-hifi3",
+		.platform_name = "0000:00:0e.0",
+		.init = broxton_hdmi_init,
+		.dpcm_playback = 1,
+		.no_pcm = 1,
+	},
+};
+
+/* broxton audio machine driver for SPT + RT298S */
+static struct snd_soc_card broxton_rt298 = {
+	.name = "broxton-rt298",
+	.owner = THIS_MODULE,
+	.dai_link = broxton_rt298_dais,
+	.num_links = ARRAY_SIZE(broxton_rt298_dais),
+	.controls = broxton_controls,
+	.num_controls = ARRAY_SIZE(broxton_controls),
+	.dapm_widgets = broxton_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(broxton_widgets),
+	.dapm_routes = broxton_rt298_map,
+	.num_dapm_routes = ARRAY_SIZE(broxton_rt298_map),
+	.fully_routed = true,
+};
+
+static int broxton_audio_probe(struct platform_device *pdev)
+{
+	broxton_rt298.dev = &pdev->dev;
+
+	return devm_snd_soc_register_card(&pdev->dev, &broxton_rt298);
+}
+
+static struct platform_driver broxton_audio = {
+	.probe = broxton_audio_probe,
+	.driver = {
+		.name = "bxt_alc298s_i2s",
+	},
+};
+module_platform_driver(broxton_audio)
+
+/* Module information */
+MODULE_AUTHOR("Ramesh Babu <Ramesh.Babu@intel.com>");
+MODULE_AUTHOR("Senthilnathan Veppur <senthilnathanx.veppur@intel.com>");
+MODULE_DESCRIPTION("Intel SST Audio for Broxton");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:bxt_alc298s_i2s");
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 032a2e7..88efb62 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -304,7 +304,7 @@
 		/* back ends */
 	{
 		.name = "SSP2-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "ssp2-port",
 		.platform_name = "sst-mfld-platform",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index 1c95ccc..35f591e 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -267,7 +267,7 @@
 	/* back ends */
 	{
 		.name = "SSP2-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "ssp2-port",
 		.platform_name = "sst-mfld-platform",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index ac60b04..cdcced9 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -255,7 +255,7 @@
 	/* back ends */
 	{
 		.name = "SSP2-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "ssp2-port",
 		.platform_name = "sst-mfld-platform",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 3f2c1ea..d7ef292 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -295,7 +295,7 @@
 	/* back ends */
 	{
 		.name = "SSP2-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "ssp2-port",
 		.platform_name = "sst-mfld-platform",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index 2e5347f..df9d254 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -273,7 +273,7 @@
 	{
 		/* SSP2 - Codec */
 		.name = "SSP2-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "ssp2-port",
 		.platform_name = "sst-mfld-platform",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c
index 2255857..863f1d5 100644
--- a/sound/soc/intel/boards/haswell.c
+++ b/sound/soc/intel/boards/haswell.c
@@ -156,7 +156,7 @@
 	{
 		/* SSP0 - Codec */
 		.name = "Codec",
-		.be_id = 0,
+		.id = 0,
 		.cpu_dai_name = "snd-soc-dummy-dai",
 		.platform_name = "snd-soc-dummy",
 		.no_pcm = 1,
diff --git a/sound/soc/intel/boards/skl_nau88l25_max98357a.c b/sound/soc/intel/boards/skl_nau88l25_max98357a.c
index 72176b7..d280865 100644
--- a/sound/soc/intel/boards/skl_nau88l25_max98357a.c
+++ b/sound/soc/intel/boards/skl_nau88l25_max98357a.c
@@ -30,6 +30,16 @@
 static struct snd_soc_jack skylake_headset;
 static struct snd_soc_card skylake_audio_card;
 
+struct skl_hdmi_pcm {
+	struct list_head head;
+	struct snd_soc_dai *codec_dai;
+	int device;
+};
+
+struct skl_nau8825_private {
+	struct list_head hdmi_pcm_list;
+};
+
 enum {
 	SKL_DPCM_AUDIO_PB = 0,
 	SKL_DPCM_AUDIO_CP,
@@ -192,23 +202,56 @@
 
 static int skylake_hdmi1_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau8825_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI1_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI1_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static int skylake_hdmi2_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau8825_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI2_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI2_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static int skylake_hdmi3_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau8825_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI3_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI3_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static int skylake_nau8825_fe_init(struct snd_soc_pcm_runtime *rtd)
@@ -391,7 +434,6 @@
 		.platform_name = "0000:00:1f.3",
 		.init = NULL,
 		.dpcm_capture = 1,
-		.ignore_suspend = 1,
 		.nonatomic = 1,
 		.dynamic = 1,
 		.ops = &skylaye_refcap_ops,
@@ -456,7 +498,7 @@
 	{
 		/* SSP0 - Codec */
 		.name = "SSP0-Codec",
-		.be_id = 0,
+		.id = 0,
 		.cpu_dai_name = "SSP0 Pin",
 		.platform_name = "0000:00:1f.3",
 		.no_pcm = 1,
@@ -472,7 +514,7 @@
 	{
 		/* SSP1 - Codec */
 		.name = "SSP1-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "SSP1 Pin",
 		.platform_name = "0000:00:1f.3",
 		.no_pcm = 1,
@@ -489,7 +531,7 @@
 	},
 	{
 		.name = "dmic01",
-		.be_id = 2,
+		.id = 2,
 		.cpu_dai_name = "DMIC01 Pin",
 		.codec_name = "dmic-codec",
 		.codec_dai_name = "dmic-hifi",
@@ -501,7 +543,7 @@
 	},
 	{
 		.name = "iDisp1",
-		.be_id = 3,
+		.id = 3,
 		.cpu_dai_name = "iDisp1 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi1",
@@ -512,7 +554,7 @@
 	},
 	{
 		.name = "iDisp2",
-		.be_id = 4,
+		.id = 4,
 		.cpu_dai_name = "iDisp2 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi2",
@@ -523,7 +565,7 @@
 	},
 	{
 		.name = "iDisp3",
-		.be_id = 5,
+		.id = 5,
 		.cpu_dai_name = "iDisp3 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi3",
@@ -534,6 +576,21 @@
 	},
 };
 
+static int skylake_card_late_probe(struct snd_soc_card *card)
+{
+	struct skl_nau8825_private *ctx = snd_soc_card_get_drvdata(card);
+	struct skl_hdmi_pcm *pcm;
+	int err;
+
+	list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) {
+		err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 /* skylake audio machine driver for SPT + NAU88L25 */
 static struct snd_soc_card skylake_audio_card = {
 	.name = "sklnau8825max",
@@ -547,11 +604,21 @@
 	.dapm_routes = skylake_map,
 	.num_dapm_routes = ARRAY_SIZE(skylake_map),
 	.fully_routed = true,
+	.late_probe = skylake_card_late_probe,
 };
 
 static int skylake_audio_probe(struct platform_device *pdev)
 {
+	struct skl_nau8825_private *ctx;
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	if (!ctx)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ctx->hdmi_pcm_list);
+
 	skylake_audio_card.dev = &pdev->dev;
+	snd_soc_card_set_drvdata(&skylake_audio_card, ctx);
 
 	return devm_snd_soc_register_card(&pdev->dev, &skylake_audio_card);
 }
diff --git a/sound/soc/intel/boards/skl_nau88l25_ssm4567.c b/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
index 5f1ca99..e19aa99 100644
--- a/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
+++ b/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
@@ -34,6 +34,15 @@
 static struct snd_soc_jack skylake_headset;
 static struct snd_soc_card skylake_audio_card;
 
+struct skl_hdmi_pcm {
+	struct list_head head;
+	struct snd_soc_dai *codec_dai;
+	int device;
+};
+
+struct skl_nau88125_private {
+	struct list_head hdmi_pcm_list;
+};
 enum {
 	SKL_DPCM_AUDIO_PB = 0,
 	SKL_DPCM_AUDIO_CP,
@@ -222,24 +231,57 @@
 
 static int skylake_hdmi1_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau88125_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI1_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI1_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static int skylake_hdmi2_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau88125_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI2_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI2_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 
 static int skylake_hdmi3_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_nau88125_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI3_PB);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI3_PB;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static int skylake_nau8825_fe_init(struct snd_soc_pcm_runtime *rtd)
@@ -440,7 +482,6 @@
 		.platform_name = "0000:00:1f.3",
 		.init = NULL,
 		.dpcm_capture = 1,
-		.ignore_suspend = 1,
 		.nonatomic = 1,
 		.dynamic = 1,
 		.ops = &skylaye_refcap_ops,
@@ -505,7 +546,7 @@
 	{
 		/* SSP0 - Codec */
 		.name = "SSP0-Codec",
-		.be_id = 0,
+		.id = 0,
 		.cpu_dai_name = "SSP0 Pin",
 		.platform_name = "0000:00:1f.3",
 		.no_pcm = 1,
@@ -523,7 +564,7 @@
 	{
 		/* SSP1 - Codec */
 		.name = "SSP1-Codec",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "SSP1 Pin",
 		.platform_name = "0000:00:1f.3",
 		.no_pcm = 1,
@@ -540,7 +581,7 @@
 	},
 	{
 		.name = "dmic01",
-		.be_id = 2,
+		.id = 2,
 		.cpu_dai_name = "DMIC01 Pin",
 		.codec_name = "dmic-codec",
 		.codec_dai_name = "dmic-hifi",
@@ -552,7 +593,7 @@
 	},
 	{
 		.name = "iDisp1",
-		.be_id = 3,
+		.id = 3,
 		.cpu_dai_name = "iDisp1 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi1",
@@ -563,7 +604,7 @@
 	},
 	{
 		.name = "iDisp2",
-		.be_id = 4,
+		.id = 4,
 		.cpu_dai_name = "iDisp2 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi2",
@@ -574,7 +615,7 @@
 	},
 	{
 		.name = "iDisp3",
-		.be_id = 5,
+		.id = 5,
 		.cpu_dai_name = "iDisp3 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi3",
@@ -585,6 +626,21 @@
 	},
 };
 
+static int skylake_card_late_probe(struct snd_soc_card *card)
+{
+	struct skl_nau88125_private *ctx = snd_soc_card_get_drvdata(card);
+	struct skl_hdmi_pcm *pcm;
+	int err;
+
+	list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) {
+		err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 /* skylake audio machine driver for SPT + NAU88L25 */
 static struct snd_soc_card skylake_audio_card = {
 	.name = "sklnau8825adi",
@@ -600,11 +656,21 @@
 	.codec_conf = ssm4567_codec_conf,
 	.num_configs = ARRAY_SIZE(ssm4567_codec_conf),
 	.fully_routed = true,
+	.late_probe = skylake_card_late_probe,
 };
 
 static int skylake_audio_probe(struct platform_device *pdev)
 {
+	struct skl_nau88125_private *ctx;
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	if (!ctx)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ctx->hdmi_pcm_list);
+
 	skylake_audio_card.dev = &pdev->dev;
+	snd_soc_card_set_drvdata(&skylake_audio_card, ctx);
 
 	return devm_snd_soc_register_card(&pdev->dev, &skylake_audio_card);
 }
diff --git a/sound/soc/intel/boards/skl_rt286.c b/sound/soc/intel/boards/skl_rt286.c
index 2016397a..426b482 100644
--- a/sound/soc/intel/boards/skl_rt286.c
+++ b/sound/soc/intel/boards/skl_rt286.c
@@ -30,6 +30,16 @@
 
 static struct snd_soc_jack skylake_headset;
 
+struct skl_hdmi_pcm {
+	struct list_head head;
+	struct snd_soc_dai *codec_dai;
+	int device;
+};
+
+struct skl_rt286_private {
+	struct list_head hdmi_pcm_list;
+};
+
 enum {
 	SKL_DPCM_AUDIO_PB = 0,
 	SKL_DPCM_AUDIO_CP,
@@ -142,9 +152,20 @@
 
 static int skylake_hdmi_init(struct snd_soc_pcm_runtime *rtd)
 {
+	struct skl_rt286_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_dai *dai = rtd->codec_dai;
+	struct skl_hdmi_pcm *pcm;
 
-	return hdac_hdmi_jack_init(dai, SKL_DPCM_AUDIO_HDMI1_PB + dai->id);
+	pcm = devm_kzalloc(rtd->card->dev, sizeof(*pcm), GFP_KERNEL);
+	if (!pcm)
+		return -ENOMEM;
+
+	pcm->device = SKL_DPCM_AUDIO_HDMI1_PB + dai->id;
+	pcm->codec_dai = dai;
+
+	list_add_tail(&pcm->head, &ctx->hdmi_pcm_list);
+
+	return 0;
 }
 
 static unsigned int rates[] = {
@@ -317,7 +338,6 @@
 		.platform_name = "0000:00:1f.3",
 		.init = NULL,
 		.dpcm_capture = 1,
-		.ignore_suspend = 1,
 		.nonatomic = 1,
 		.dynamic = 1,
 	},
@@ -375,7 +395,7 @@
 	{
 		/* SSP0 - Codec */
 		.name = "SSP0-Codec",
-		.be_id = 0,
+		.id = 0,
 		.cpu_dai_name = "SSP0 Pin",
 		.platform_name = "0000:00:1f.3",
 		.no_pcm = 1,
@@ -393,7 +413,7 @@
 	},
 	{
 		.name = "dmic01",
-		.be_id = 1,
+		.id = 1,
 		.cpu_dai_name = "DMIC01 Pin",
 		.codec_name = "dmic-codec",
 		.codec_dai_name = "dmic-hifi",
@@ -405,7 +425,7 @@
 	},
 	{
 		.name = "iDisp1",
-		.be_id = 2,
+		.id = 2,
 		.cpu_dai_name = "iDisp1 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi1",
@@ -416,7 +436,7 @@
 	},
 	{
 		.name = "iDisp2",
-		.be_id = 3,
+		.id = 3,
 		.cpu_dai_name = "iDisp2 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi2",
@@ -427,7 +447,7 @@
 	},
 	{
 		.name = "iDisp3",
-		.be_id = 4,
+		.id = 4,
 		.cpu_dai_name = "iDisp3 Pin",
 		.codec_name = "ehdaudio0D2",
 		.codec_dai_name = "intel-hdmi-hifi3",
@@ -438,6 +458,21 @@
 	},
 };
 
+static int skylake_card_late_probe(struct snd_soc_card *card)
+{
+	struct skl_rt286_private *ctx = snd_soc_card_get_drvdata(card);
+	struct skl_hdmi_pcm *pcm;
+	int err;
+
+	list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) {
+		err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 /* skylake audio machine driver for SPT + RT286S */
 static struct snd_soc_card skylake_rt286 = {
 	.name = "skylake-rt286",
@@ -451,11 +486,21 @@
 	.dapm_routes = skylake_rt286_map,
 	.num_dapm_routes = ARRAY_SIZE(skylake_rt286_map),
 	.fully_routed = true,
+	.late_probe = skylake_card_late_probe,
 };
 
 static int skylake_audio_probe(struct platform_device *pdev)
 {
+	struct skl_rt286_private *ctx;
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	if (!ctx)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ctx->hdmi_pcm_list);
+
 	skylake_rt286.dev = &pdev->dev;
+	snd_soc_card_set_drvdata(&skylake_rt286, ctx);
 
 	return devm_snd_soc_register_card(&pdev->dev, &skylake_rt286);
 }
diff --git a/sound/soc/intel/common/sst-acpi.h b/sound/soc/intel/common/sst-acpi.h
index 4dcfb7e..8398cb2 100644
--- a/sound/soc/intel/common/sst-acpi.h
+++ b/sound/soc/intel/common/sst-acpi.h
@@ -12,10 +12,19 @@
  *
  */
 
+#include <linux/kconfig.h>
+#include <linux/stddef.h>
 #include <linux/acpi.h>
 
 /* translation fron HID to I2C name, needed for DAI codec_name */
+#if IS_ENABLED(CONFIG_ACPI)
 const char *sst_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]);
+#else
+inline const char *sst_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN])
+{
+	return NULL;
+}
+#endif
 
 /* acpi match */
 struct sst_acpi_mach *sst_acpi_find_machine(struct sst_acpi_mach *machines);
diff --git a/sound/soc/intel/common/sst-firmware.c b/sound/soc/intel/common/sst-firmware.c
index ef4881e..2599352 100644
--- a/sound/soc/intel/common/sst-firmware.c
+++ b/sound/soc/intel/common/sst-firmware.c
@@ -203,7 +203,7 @@
 
 	chip->dev = dev;
 
-	err = dw_dma_probe(chip, NULL);
+	err = dw_dma_probe(chip);
 	if (err)
 		return ERR_PTR(err);
 
diff --git a/sound/soc/intel/haswell/sst-haswell-pcm.c b/sound/soc/intel/haswell/sst-haswell-pcm.c
index 1aa819c..994256b 100644
--- a/sound/soc/intel/haswell/sst-haswell-pcm.c
+++ b/sound/soc/intel/haswell/sst-haswell-pcm.c
@@ -445,7 +445,7 @@
 
 	pages = snd_sgbuf_aligned_pages(size);
 
-	dev_dbg(rtd->dev, "generating page table for %p size 0x%zu pages %d\n",
+	dev_dbg(rtd->dev, "generating page table for %p size 0x%zx pages %d\n",
 		dma_area, size, pages);
 
 	for (i = 0; i < pages; i++) {
diff --git a/sound/soc/intel/skylake/Makefile b/sound/soc/intel/skylake/Makefile
index 914b6da..c28f5d0 100644
--- a/sound/soc/intel/skylake/Makefile
+++ b/sound/soc/intel/skylake/Makefile
@@ -5,6 +5,6 @@
 
 # Skylake IPC Support
 snd-soc-skl-ipc-objs := skl-sst-ipc.o skl-sst-dsp.o skl-sst-cldma.o \
-		skl-sst.o
+		skl-sst.o bxt-sst.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SKYLAKE) += snd-soc-skl-ipc.o
diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c
new file mode 100644
index 0000000..965ce40
--- /dev/null
+++ b/sound/soc/intel/skylake/bxt-sst.c
@@ -0,0 +1,328 @@
+/*
+ *  bxt-sst.c - DSP library functions for BXT platform
+ *
+ *  Copyright (C) 2015-16 Intel Corp
+ *  Author:Rafal Redzimski <rafal.f.redzimski@intel.com>
+ *	   Jeeja KP <jeeja.kp@intel.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/device.h>
+
+#include "../common/sst-dsp.h"
+#include "../common/sst-dsp-priv.h"
+#include "skl-sst-ipc.h"
+
+#define BXT_BASEFW_TIMEOUT	3000
+#define BXT_INIT_TIMEOUT	500
+#define BXT_IPC_PURGE_FW	0x01004000
+
+#define BXT_ROM_INIT		0x5
+#define BXT_ADSP_SRAM0_BASE	0x80000
+
+/* Firmware status window */
+#define BXT_ADSP_FW_STATUS	BXT_ADSP_SRAM0_BASE
+#define BXT_ADSP_ERROR_CODE     (BXT_ADSP_FW_STATUS + 0x4)
+
+#define BXT_ADSP_SRAM1_BASE	0xA0000
+
+static unsigned int bxt_get_errorcode(struct sst_dsp *ctx)
+{
+	 return sst_dsp_shim_read(ctx, BXT_ADSP_ERROR_CODE);
+}
+
+static int sst_bxt_prepare_fw(struct sst_dsp *ctx,
+			const void *fwdata, u32 fwsize)
+{
+	int stream_tag, ret, i;
+	u32 reg;
+
+	stream_tag = ctx->dsp_ops.prepare(ctx->dev, 0x40, fwsize, &ctx->dmab);
+	if (stream_tag < 0) {
+		dev_err(ctx->dev, "Failed to prepare DMA FW loading err: %x\n",
+				stream_tag);
+		return stream_tag;
+	}
+
+	ctx->dsp_ops.stream_tag = stream_tag;
+	memcpy(ctx->dmab.area, fwdata, fwsize);
+
+	/* Purge FW request */
+	sst_dsp_shim_write(ctx, SKL_ADSP_REG_HIPCI, SKL_ADSP_REG_HIPCI_BUSY |
+					 BXT_IPC_PURGE_FW | (stream_tag - 1));
+
+	ret = skl_dsp_enable_core(ctx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Boot dsp core failed ret: %d\n", ret);
+		ret = -EIO;
+		goto base_fw_load_failed;
+	}
+
+	for (i = BXT_INIT_TIMEOUT; i > 0; --i) {
+		reg = sst_dsp_shim_read(ctx, SKL_ADSP_REG_HIPCIE);
+
+		if (reg & SKL_ADSP_REG_HIPCIE_DONE) {
+			sst_dsp_shim_update_bits_forced(ctx,
+					SKL_ADSP_REG_HIPCIE,
+					SKL_ADSP_REG_HIPCIE_DONE,
+					SKL_ADSP_REG_HIPCIE_DONE);
+			break;
+		}
+		mdelay(1);
+	}
+	if (!i) {
+		dev_info(ctx->dev, "Waiting for HIPCIE done, reg: 0x%x\n", reg);
+		sst_dsp_shim_update_bits(ctx, SKL_ADSP_REG_HIPCIE,
+				SKL_ADSP_REG_HIPCIE_DONE,
+				SKL_ADSP_REG_HIPCIE_DONE);
+	}
+
+	/* enable Interrupt */
+	skl_ipc_int_enable(ctx);
+	skl_ipc_op_int_enable(ctx);
+
+	for (i = BXT_INIT_TIMEOUT; i > 0; --i) {
+		if (SKL_FW_INIT ==
+				(sst_dsp_shim_read(ctx, BXT_ADSP_FW_STATUS) &
+				SKL_FW_STS_MASK)) {
+
+			dev_info(ctx->dev, "ROM loaded, continue FW loading\n");
+			break;
+		}
+		mdelay(1);
+	}
+	if (!i) {
+		dev_err(ctx->dev, "Timeout for ROM init, HIPCIE: 0x%x\n", reg);
+		ret = -EIO;
+		goto base_fw_load_failed;
+	}
+
+	return ret;
+
+base_fw_load_failed:
+	ctx->dsp_ops.cleanup(ctx->dev, &ctx->dmab, stream_tag);
+	skl_dsp_disable_core(ctx);
+	return ret;
+}
+
+static int sst_transfer_fw_host_dma(struct sst_dsp *ctx)
+{
+	int ret;
+
+	ctx->dsp_ops.trigger(ctx->dev, true, ctx->dsp_ops.stream_tag);
+	ret = sst_dsp_register_poll(ctx, BXT_ADSP_FW_STATUS, SKL_FW_STS_MASK,
+			BXT_ROM_INIT, BXT_BASEFW_TIMEOUT, "Firmware boot");
+
+	ctx->dsp_ops.trigger(ctx->dev, false, ctx->dsp_ops.stream_tag);
+	ctx->dsp_ops.cleanup(ctx->dev, &ctx->dmab, ctx->dsp_ops.stream_tag);
+
+	return ret;
+}
+
+static int bxt_load_base_firmware(struct sst_dsp *ctx)
+{
+	const struct firmware *fw = NULL;
+	struct skl_sst *skl = ctx->thread_context;
+	int ret;
+
+	ret = request_firmware(&fw, ctx->fw_name, ctx->dev);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Request firmware failed %d\n", ret);
+		goto sst_load_base_firmware_failed;
+	}
+
+	ret = sst_bxt_prepare_fw(ctx, fw->data, fw->size);
+	/* Retry Enabling core and ROM load. Retry seemed to help */
+	if (ret < 0) {
+		ret = sst_bxt_prepare_fw(ctx, fw->data, fw->size);
+		if (ret < 0) {
+			dev_err(ctx->dev, "Core En/ROM load fail:%d\n", ret);
+			goto sst_load_base_firmware_failed;
+		}
+	}
+
+	ret = sst_transfer_fw_host_dma(ctx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Transfer firmware failed %d\n", ret);
+		dev_info(ctx->dev, "Error code=0x%x: FW status=0x%x\n",
+			sst_dsp_shim_read(ctx, BXT_ADSP_ERROR_CODE),
+			sst_dsp_shim_read(ctx, BXT_ADSP_FW_STATUS));
+
+		skl_dsp_disable_core(ctx);
+	} else {
+		dev_dbg(ctx->dev, "Firmware download successful\n");
+		ret = wait_event_timeout(skl->boot_wait, skl->boot_complete,
+					msecs_to_jiffies(SKL_IPC_BOOT_MSECS));
+		if (ret == 0) {
+			dev_err(ctx->dev, "DSP boot fail, FW Ready timeout\n");
+			skl_dsp_disable_core(ctx);
+			ret = -EIO;
+		} else {
+			skl_dsp_set_state_locked(ctx, SKL_DSP_RUNNING);
+			ret = 0;
+		}
+	}
+
+sst_load_base_firmware_failed:
+	release_firmware(fw);
+	return ret;
+}
+
+static int bxt_set_dsp_D0(struct sst_dsp *ctx)
+{
+	struct skl_sst *skl = ctx->thread_context;
+	int ret;
+
+	skl->boot_complete = false;
+
+	ret = skl_dsp_enable_core(ctx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "enable dsp core failed ret: %d\n", ret);
+		return ret;
+	}
+
+	/* enable interrupt */
+	skl_ipc_int_enable(ctx);
+	skl_ipc_op_int_enable(ctx);
+
+	ret = wait_event_timeout(skl->boot_wait, skl->boot_complete,
+					msecs_to_jiffies(SKL_IPC_BOOT_MSECS));
+	if (ret == 0) {
+		dev_err(ctx->dev, "ipc: error DSP boot timeout\n");
+		dev_err(ctx->dev, "Error code=0x%x: FW status=0x%x\n",
+			sst_dsp_shim_read(ctx, BXT_ADSP_ERROR_CODE),
+			sst_dsp_shim_read(ctx, BXT_ADSP_FW_STATUS));
+		return -EIO;
+	}
+
+	skl_dsp_set_state_locked(ctx, SKL_DSP_RUNNING);
+	return 0;
+}
+
+static int bxt_set_dsp_D3(struct sst_dsp *ctx)
+{
+	struct skl_ipc_dxstate_info dx;
+	struct skl_sst *skl = ctx->thread_context;
+	int ret = 0;
+
+	if (!is_skl_dsp_running(ctx))
+		return ret;
+
+	dx.core_mask = SKL_DSP_CORE0_MASK;
+	dx.dx_mask = SKL_IPC_D3_MASK;
+
+	ret = skl_ipc_set_dx(&skl->ipc, SKL_INSTANCE_ID,
+				SKL_BASE_FW_MODULE_ID, &dx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Failed to set DSP to D3 state: %d\n", ret);
+		return ret;
+	}
+
+	ret = skl_dsp_disable_core(ctx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "disbale dsp core failed: %d\n", ret);
+		ret = -EIO;
+	}
+
+	skl_dsp_set_state_locked(ctx, SKL_DSP_RESET);
+	return 0;
+}
+
+static struct skl_dsp_fw_ops bxt_fw_ops = {
+	.set_state_D0 = bxt_set_dsp_D0,
+	.set_state_D3 = bxt_set_dsp_D3,
+	.load_fw = bxt_load_base_firmware,
+	.get_fw_errcode = bxt_get_errorcode,
+};
+
+static struct sst_ops skl_ops = {
+	.irq_handler = skl_dsp_sst_interrupt,
+	.write = sst_shim32_write,
+	.read = sst_shim32_read,
+	.ram_read = sst_memcpy_fromio_32,
+	.ram_write = sst_memcpy_toio_32,
+	.free = skl_dsp_free,
+};
+
+static struct sst_dsp_device skl_dev = {
+	.thread = skl_dsp_irq_thread_handler,
+	.ops = &skl_ops,
+};
+
+int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
+			const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
+			struct skl_sst **dsp)
+{
+	struct skl_sst *skl;
+	struct sst_dsp *sst;
+	int ret;
+
+	skl = devm_kzalloc(dev, sizeof(*skl), GFP_KERNEL);
+	if (skl == NULL)
+		return -ENOMEM;
+
+	skl->dev = dev;
+	skl_dev.thread_context = skl;
+
+	skl->dsp = skl_dsp_ctx_init(dev, &skl_dev, irq);
+	if (!skl->dsp) {
+		dev_err(skl->dev, "skl_dsp_ctx_init failed\n");
+		return -ENODEV;
+	}
+
+	sst = skl->dsp;
+	sst->fw_name = fw_name;
+	sst->dsp_ops = dsp_ops;
+	sst->fw_ops = bxt_fw_ops;
+	sst->addr.lpe = mmio_base;
+	sst->addr.shim = mmio_base;
+
+	sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ),
+			SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ);
+
+	ret = skl_ipc_init(dev, skl);
+	if (ret)
+		return ret;
+
+	skl->boot_complete = false;
+	init_waitqueue_head(&skl->boot_wait);
+
+	ret = sst->fw_ops.load_fw(sst);
+	if (ret < 0) {
+		dev_err(dev, "Load base fw failed: %x", ret);
+		return ret;
+	}
+
+	if (dsp)
+		*dsp = skl;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bxt_sst_dsp_init);
+
+
+void bxt_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx)
+{
+	skl_ipc_free(&ctx->ipc);
+	ctx->dsp->cl_dev.ops.cl_cleanup_controller(ctx->dsp);
+
+	if (ctx->dsp->addr.lpe)
+		iounmap(ctx->dsp->addr.lpe);
+
+	ctx->dsp->ops->free(ctx->dsp);
+}
+EXPORT_SYMBOL_GPL(bxt_sst_dsp_cleanup);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Broxton IPC driver");
diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index 79c5089..226db84 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -72,6 +72,105 @@
 	skl_ipc_set_large_config(&ctx->ipc, &msg, (u32 *)&mask);
 }
 
+static int skl_dsp_setup_spib(struct device *dev, unsigned int size,
+				int stream_tag, int enable)
+{
+	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_stream *stream = snd_hdac_get_stream(bus,
+			SNDRV_PCM_STREAM_PLAYBACK, stream_tag);
+	struct hdac_ext_stream *estream;
+
+	if (!stream)
+		return -EINVAL;
+
+	estream = stream_to_hdac_ext_stream(stream);
+	/* enable/disable SPIB for this hdac stream */
+	snd_hdac_ext_stream_spbcap_enable(ebus, enable, stream->index);
+
+	/* set the spib value */
+	snd_hdac_ext_stream_set_spib(ebus, estream, size);
+
+	return 0;
+}
+
+static int skl_dsp_prepare(struct device *dev, unsigned int format,
+			unsigned int size, struct snd_dma_buffer *dmab)
+{
+	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_ext_stream *estream;
+	struct hdac_stream *stream;
+	struct snd_pcm_substream substream;
+	int ret;
+
+	if (!bus)
+		return -ENODEV;
+
+	memset(&substream, 0, sizeof(substream));
+	substream.stream = SNDRV_PCM_STREAM_PLAYBACK;
+
+	estream = snd_hdac_ext_stream_assign(ebus, &substream,
+					HDAC_EXT_STREAM_TYPE_HOST);
+	if (!estream)
+		return -ENODEV;
+
+	stream = hdac_stream(estream);
+
+	/* assign decouple host dma channel */
+	ret = snd_hdac_dsp_prepare(stream, format, size, dmab);
+	if (ret < 0)
+		return ret;
+
+	skl_dsp_setup_spib(dev, size, stream->stream_tag, true);
+
+	return stream->stream_tag;
+}
+
+static int skl_dsp_trigger(struct device *dev, bool start, int stream_tag)
+{
+	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_stream *stream;
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+
+	if (!bus)
+		return -ENODEV;
+
+	stream = snd_hdac_get_stream(bus,
+		SNDRV_PCM_STREAM_PLAYBACK, stream_tag);
+	if (!stream)
+		return -EINVAL;
+
+	snd_hdac_dsp_trigger(stream, start);
+
+	return 0;
+}
+
+static int skl_dsp_cleanup(struct device *dev,
+		struct snd_dma_buffer *dmab, int stream_tag)
+{
+	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_stream *stream;
+	struct hdac_ext_stream *estream;
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+
+	if (!bus)
+		return -ENODEV;
+
+	stream = snd_hdac_get_stream(bus,
+		SNDRV_PCM_STREAM_PLAYBACK, stream_tag);
+	if (!stream)
+		return -EINVAL;
+
+	estream = stream_to_hdac_ext_stream(stream);
+	skl_dsp_setup_spib(dev, 0, stream_tag, false);
+	snd_hdac_ext_stream_release(estream, HDAC_EXT_STREAM_TYPE_HOST);
+
+	snd_hdac_dsp_cleanup(stream, dmab);
+
+	return 0;
+}
+
 static struct skl_dsp_loader_ops skl_get_loader_ops(void)
 {
 	struct skl_dsp_loader_ops loader_ops;
@@ -84,6 +183,21 @@
 	return loader_ops;
 };
 
+static struct skl_dsp_loader_ops bxt_get_loader_ops(void)
+{
+	struct skl_dsp_loader_ops loader_ops;
+
+	memset(&loader_ops, 0, sizeof(loader_ops));
+
+	loader_ops.alloc_dma_buf = skl_alloc_dma_buf;
+	loader_ops.free_dma_buf = skl_free_dma_buf;
+	loader_ops.prepare = skl_dsp_prepare;
+	loader_ops.trigger = skl_dsp_trigger;
+	loader_ops.cleanup = skl_dsp_cleanup;
+
+	return loader_ops;
+};
+
 static const struct skl_dsp_ops dsp_ops[] = {
 	{
 		.id = 0x9d70,
@@ -91,6 +205,12 @@
 		.init = skl_sst_dsp_init,
 		.cleanup = skl_sst_dsp_cleanup
 	},
+	{
+		.id = 0x5a98,
+		.loader_ops = bxt_get_loader_ops,
+		.init = bxt_sst_dsp_init,
+		.cleanup = bxt_sst_dsp_cleanup
+	},
 };
 
 static int skl_get_dsp_ops(int pci_id)
@@ -744,7 +864,7 @@
 		return ret;
 	}
 	mconfig->m_state = SKL_MODULE_INIT_DONE;
-
+	kfree(param_data);
 	return ret;
 }
 
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index 14d1916e..7d73648 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -25,11 +25,12 @@
 
 #define DSDT_NHLT_PATH "\\_SB.PCI0.HDAS"
 
-void *skl_nhlt_init(struct device *dev)
+struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 {
 	acpi_handle handle;
 	union acpi_object *obj;
 	struct nhlt_resource_desc  *nhlt_ptr = NULL;
+	struct nhlt_acpi_table *nhlt_table = NULL;
 
 	if (ACPI_FAILURE(acpi_get_handle(NULL, DSDT_NHLT_PATH, &handle))) {
 		dev_err(dev, "Requested NHLT device not found\n");
@@ -39,18 +40,20 @@
 	obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
 	if (obj && obj->type == ACPI_TYPE_BUFFER) {
 		nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
-
-		return memremap(nhlt_ptr->min_addr, nhlt_ptr->length,
+		nhlt_table = (struct nhlt_acpi_table *)
+				memremap(nhlt_ptr->min_addr, nhlt_ptr->length,
 				MEMREMAP_WB);
+		ACPI_FREE(obj);
+		return nhlt_table;
 	}
 
 	dev_err(dev, "device specific method to extract NHLT blob failed\n");
 	return NULL;
 }
 
-void skl_nhlt_free(void *addr)
+void skl_nhlt_free(struct nhlt_acpi_table *nhlt)
 {
-	memunmap(addr);
+	memunmap((void *) nhlt);
 }
 
 static struct nhlt_specific_cfg *skl_get_specific_cfg(
@@ -120,7 +123,7 @@
 	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
 	struct device *dev = bus->dev;
 	struct nhlt_specific_cfg *sp_config;
-	struct nhlt_acpi_table *nhlt = (struct nhlt_acpi_table *)skl->nhlt;
+	struct nhlt_acpi_table *nhlt = skl->nhlt;
 	u16 bps = (s_fmt == 16) ? 16 : 32;
 	u8 j;
 
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index dab0900..7c81b31 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -51,7 +51,7 @@
 	.rate_min =		8000,
 	.rate_max =		48000,
 	.channels_min =		1,
-	.channels_max =		HDA_QUAD,
+	.channels_max =		8,
 	.buffer_bytes_max =	AZX_MAX_BUF_SIZE,
 	.period_bytes_min =	128,
 	.period_bytes_max =	AZX_MAX_BUF_SIZE / 2,
@@ -213,7 +213,7 @@
 	struct skl_sst *ctx = skl->skl_sst;
 	struct skl_module_cfg *mconfig;
 
-	if ((dai->playback_active > 1) || (dai->capture_active > 1))
+	if (dai->playback_widget->power || dai->capture_widget->power)
 		return 0;
 
 	mconfig = skl_tplg_be_get_cpr_module(dai, substream->stream);
@@ -402,23 +402,33 @@
 	struct skl_module_cfg *mconfig;
 	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
+	struct snd_soc_dapm_widget *w;
 	int ret;
 
 	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
 	if (!mconfig)
 		return -EIO;
 
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		w = dai->playback_widget;
+	else
+		w = dai->capture_widget;
+
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_RESUME:
-		skl_pcm_prepare(substream, dai);
-		/*
-		 * enable DMA Resume enable bit for the stream, set the dpib
-		 * & lpib position to resune before starting the DMA
-		 */
-		snd_hdac_ext_stream_drsm_enable(ebus, true,
-					hdac_stream(stream)->index);
-		snd_hdac_ext_stream_set_dpibr(ebus, stream, stream->dpib);
-		snd_hdac_ext_stream_set_lpib(stream, stream->lpib);
+		if (!w->ignore_suspend) {
+			skl_pcm_prepare(substream, dai);
+			/*
+			 * enable DMA Resume enable bit for the stream, set the
+			 * dpib & lpib position to resume before starting the
+			 * DMA
+			 */
+			snd_hdac_ext_stream_drsm_enable(ebus, true,
+						hdac_stream(stream)->index);
+			snd_hdac_ext_stream_set_dpibr(ebus, stream,
+							stream->dpib);
+			snd_hdac_ext_stream_set_lpib(stream, stream->lpib);
+		}
 
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
@@ -448,7 +458,7 @@
 			return ret;
 
 		ret = skl_decoupled_trigger(substream, cmd);
-		if (cmd == SNDRV_PCM_TRIGGER_SUSPEND) {
+		if ((cmd == SNDRV_PCM_TRIGGER_SUSPEND) && !w->ignore_suspend) {
 			/* save the dpib and lpib positions */
 			stream->dpib = readl(ebus->bus.remap_addr +
 					AZX_REG_VS_SDXDPIB_XBASE +
@@ -523,7 +533,6 @@
 	if (!link)
 		return -EINVAL;
 
-	snd_hdac_ext_bus_link_power_up(link);
 	snd_hdac_ext_link_stream_reset(link_dev);
 
 	snd_hdac_ext_link_stream_setup(link_dev, format_val);
@@ -682,7 +691,7 @@
 	.playback = {
 		.stream_name = "HDMI1 Playback",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_32000 |	SNDRV_PCM_RATE_44100 |
 			SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
 			SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |
@@ -697,7 +706,7 @@
 	.playback = {
 		.stream_name = "HDMI2 Playback",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_32000 |	SNDRV_PCM_RATE_44100 |
 			SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
 			SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |
@@ -712,7 +721,7 @@
 	.playback = {
 		.stream_name = "HDMI3 Playback",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_32000 |	SNDRV_PCM_RATE_44100 |
 			SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
 			SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |
@@ -760,12 +769,84 @@
 	},
 },
 {
+	.name = "SSP2 Pin",
+	.ops = &skl_be_ssp_dai_ops,
+	.playback = {
+		.stream_name = "ssp2 Tx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+	.capture = {
+		.stream_name = "ssp2 Rx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+},
+{
+	.name = "SSP3 Pin",
+	.ops = &skl_be_ssp_dai_ops,
+	.playback = {
+		.stream_name = "ssp3 Tx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+	.capture = {
+		.stream_name = "ssp3 Rx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+},
+{
+	.name = "SSP4 Pin",
+	.ops = &skl_be_ssp_dai_ops,
+	.playback = {
+		.stream_name = "ssp4 Tx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+	.capture = {
+		.stream_name = "ssp4 Rx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+},
+{
+	.name = "SSP5 Pin",
+	.ops = &skl_be_ssp_dai_ops,
+	.playback = {
+		.stream_name = "ssp5 Tx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+	.capture = {
+		.stream_name = "ssp5 Rx",
+		.channels_min = HDA_STEREO,
+		.channels_max = HDA_STEREO,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+},
+{
 	.name = "iDisp1 Pin",
 	.ops = &skl_link_dai_ops,
 	.playback = {
 		.stream_name = "iDisp1 Tx",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_48000,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE |
 			SNDRV_PCM_FMTBIT_S24_LE,
@@ -777,7 +858,7 @@
 	.playback = {
 		.stream_name = "iDisp2 Tx",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_16000|
 			SNDRV_PCM_RATE_48000,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE |
@@ -790,7 +871,7 @@
 	.playback = {
 		.stream_name = "iDisp3 Tx",
 		.channels_min = HDA_STEREO,
-		.channels_max = HDA_STEREO,
+		.channels_max = 8,
 		.rates = SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_16000|
 			SNDRV_PCM_RATE_48000,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE |
diff --git a/sound/soc/intel/skylake/skl-sst-dsp.c b/sound/soc/intel/skylake/skl-sst-dsp.c
index 2962ef2..13c1985 100644
--- a/sound/soc/intel/skylake/skl-sst-dsp.c
+++ b/sound/soc/intel/skylake/skl-sst-dsp.c
@@ -336,8 +336,6 @@
 	skl_ipc_int_disable(dsp);
 
 	free_irq(dsp->irq, dsp);
-	dsp->cl_dev.ops.cl_cleanup_controller(dsp);
-	skl_cldma_int_disable(dsp);
 	skl_ipc_op_int_disable(dsp);
 	skl_ipc_int_disable(dsp);
 
diff --git a/sound/soc/intel/skylake/skl-sst-dsp.h b/sound/soc/intel/skylake/skl-sst-dsp.h
index b6e310d..deabe73 100644
--- a/sound/soc/intel/skylake/skl-sst-dsp.h
+++ b/sound/soc/intel/skylake/skl-sst-dsp.h
@@ -118,16 +118,25 @@
 	int (*set_state_D0)(struct sst_dsp *ctx);
 	int (*set_state_D3)(struct sst_dsp *ctx);
 	unsigned int (*get_fw_errcode)(struct sst_dsp *ctx);
-	int (*load_mod)(struct sst_dsp *ctx, u16 mod_id, char *mod_name);
+	int (*load_mod)(struct sst_dsp *ctx, u16 mod_id, u8 *mod_name);
 	int (*unload_mod)(struct sst_dsp *ctx, u16 mod_id);
 
 };
 
 struct skl_dsp_loader_ops {
+	int stream_tag;
+
 	int (*alloc_dma_buf)(struct device *dev,
 		struct snd_dma_buffer *dmab, size_t size);
 	int (*free_dma_buf)(struct device *dev,
 		struct snd_dma_buffer *dmab);
+	int (*prepare)(struct device *dev, unsigned int format,
+				unsigned int byte_size,
+				struct snd_dma_buffer *bufp);
+	int (*trigger)(struct device *dev, bool start, int stream_tag);
+
+	int (*cleanup)(struct device *dev, struct snd_dma_buffer *dmab,
+				 int stream_tag);
 };
 
 struct skl_load_module_info {
@@ -160,6 +169,10 @@
 int skl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
 		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
 		struct skl_sst **dsp);
+int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
+		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
+		struct skl_sst **dsp);
 void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx);
+void bxt_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx);
 
 #endif /*__SKL_SST_DSP_H__*/
diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c
index 348a734..13ec8d5 100644
--- a/sound/soc/intel/skylake/skl-sst.c
+++ b/sound/soc/intel/skylake/skl-sst.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/uuid.h>
 #include "../common/sst-dsp.h"
 #include "../common/sst-dsp-priv.h"
 #include "../common/sst-ipc.h"
@@ -304,14 +305,16 @@
 	return ret;
 }
 
-static int skl_load_module(struct sst_dsp *ctx, u16 mod_id, char *guid)
+static int skl_load_module(struct sst_dsp *ctx, u16 mod_id, u8 *guid)
 {
 	struct skl_module_table *module_entry = NULL;
 	int ret = 0;
 	char mod_name[64]; /* guid str = 32 chars + 4 hyphens */
+	uuid_le *uuid_mod;
 
-	snprintf(mod_name, sizeof(mod_name), "%s%s%s",
-			"intel/dsp_fw_", guid, ".bin");
+	uuid_mod = (uuid_le *)guid;
+	snprintf(mod_name, sizeof(mod_name), "%s%pUL%s",
+				"intel/dsp_fw_", uuid_mod, ".bin");
 
 	module_entry = skl_module_get_from_id(ctx, mod_id);
 	if (module_entry == NULL) {
@@ -451,6 +454,10 @@
 	skl_clear_module_table(ctx->dsp);
 	skl_ipc_free(&ctx->ipc);
 	ctx->dsp->ops->free(ctx->dsp);
+	if (ctx->boot_complete) {
+		ctx->dsp->cl_dev.ops.cl_cleanup_controller(ctx->dsp);
+		skl_cldma_int_disable(ctx->dsp);
+	}
 }
 EXPORT_SYMBOL_GPL(skl_sst_dsp_cleanup);
 
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index cdb78b7..3e036b0 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -154,13 +154,32 @@
 	dev_dbg(ctx->dev, "ch_cfg = %d\n", mcfg->out_fmt[0].ch_cfg);
 }
 
+static void skl_tplg_update_chmap(struct skl_module_fmt *fmt, int chs)
+{
+	int slot_map = 0xFFFFFFFF;
+	int start_slot = 0;
+	int i;
+
+	for (i = 0; i < chs; i++) {
+		/*
+		 * For 2 channels with starting slot as 0, slot map will
+		 * look like 0xFFFFFF10.
+		 */
+		slot_map &= (~(0xF << (4 * i)) | (start_slot << (4 * i)));
+		start_slot++;
+	}
+	fmt->ch_map = slot_map;
+}
+
 static void skl_tplg_update_params(struct skl_module_fmt *fmt,
 			struct skl_pipe_params *params, int fixup)
 {
 	if (fixup & SKL_RATE_FIXUP_MASK)
 		fmt->s_freq = params->s_freq;
-	if (fixup & SKL_CH_FIXUP_MASK)
+	if (fixup & SKL_CH_FIXUP_MASK) {
 		fmt->channels = params->ch;
+		skl_tplg_update_chmap(fmt, fmt->channels);
+	}
 	if (fixup & SKL_FMT_FIXUP_MASK) {
 		fmt->valid_bit_depth = skl_get_bit_depth(params->s_fmt);
 
@@ -1564,6 +1583,8 @@
 		return -ENOMEM;
 
 	w->priv = mconfig;
+	memcpy(&mconfig->guid, &dfw_config->uuid, 16);
+
 	mconfig->id.module_id = dfw_config->module_id;
 	mconfig->id.instance_id = dfw_config->instance_id;
 	mconfig->mcps = dfw_config->max_mcps;
@@ -1593,10 +1614,6 @@
 	mconfig->time_slot = dfw_config->time_slot;
 	mconfig->formats_config.caps_size = dfw_config->caps.caps_size;
 
-	if (dfw_config->is_loadable)
-		memcpy(mconfig->guid, dfw_config->uuid,
-					ARRAY_SIZE(dfw_config->uuid));
-
 	mconfig->m_in_pin = devm_kzalloc(bus->dev, (mconfig->max_in_queue) *
 						sizeof(*mconfig->m_in_pin),
 						GFP_KERNEL);
diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h
index d2d9230..e4b399c 100644
--- a/sound/soc/intel/skylake/skl-topology.h
+++ b/sound/soc/intel/skylake/skl-topology.h
@@ -281,7 +281,7 @@
 };
 
 struct skl_module_cfg {
-	char guid[SKL_UUID_STR_SZ];
+	u8 guid[16];
 	struct skl_module_inst_id id;
 	u8 domain;
 	bool homogenous_inputs;
diff --git a/sound/soc/intel/skylake/skl-tplg-interface.h b/sound/soc/intel/skylake/skl-tplg-interface.h
index 1db88a6..a32e5e9 100644
--- a/sound/soc/intel/skylake/skl-tplg-interface.h
+++ b/sound/soc/intel/skylake/skl-tplg-interface.h
@@ -181,7 +181,7 @@
 } __packed;
 
 struct skl_dfw_module {
-	char uuid[SKL_UUID_STR_SZ];
+	u8 uuid[16];
 
 	u16 module_id;
 	u16 instance_id;
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 3982f55..06d8c26 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -229,7 +229,12 @@
 	 * running, we need to save the state for these and continue
 	 */
 	if (skl->supend_active) {
+		/* turn off the links and stop the CORB/RIRB DMA if it is On */
 		snd_hdac_ext_bus_link_power_down_all(ebus);
+
+		if (ebus->cmd_dma_state)
+			snd_hdac_bus_stop_cmd_io(&ebus->bus);
+
 		enable_irq_wake(bus->irq);
 		pci_save_state(pci);
 		pci_disable_device(pci);
@@ -255,6 +260,7 @@
 	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
 	struct skl *skl  = ebus_to_skl(ebus);
 	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
 	/* Turned OFF in HDMI codec driver after codec reconfiguration */
@@ -276,8 +282,29 @@
 		ret = pci_enable_device(pci);
 		snd_hdac_ext_bus_link_power_up_all(ebus);
 		disable_irq_wake(bus->irq);
+		/*
+		 * turn On the links which are On before active suspend
+		 * and start the CORB/RIRB DMA if On before
+		 * active suspend.
+		 */
+		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+			if (hlink->ref_count)
+				snd_hdac_ext_bus_link_power_up(hlink);
+		}
+
+		if (ebus->cmd_dma_state)
+			snd_hdac_bus_init_cmd_io(&ebus->bus);
 	} else {
 		ret = _skl_resume(ebus);
+
+		/* turn off the links which are off before suspend */
+		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+			if (!hlink->ref_count)
+				snd_hdac_ext_bus_link_power_down(hlink);
+		}
+
+		if (!ebus->cmd_dma_state)
+			snd_hdac_bus_stop_cmd_io(&ebus->bus);
 	}
 
 	return ret;
@@ -613,6 +640,7 @@
 	struct skl *skl;
 	struct hdac_ext_bus *ebus = NULL;
 	struct hdac_bus *bus = NULL;
+	struct hdac_ext_link *hlink = NULL;
 	int err;
 
 	/* we use ext core ops, so provide NULL for ops here */
@@ -643,7 +671,7 @@
 		err = skl_machine_device_register(skl,
 				  (void *)pci_id->driver_data);
 		if (err < 0)
-			goto out_free;
+			goto out_nhlt_free;
 
 		err = skl_init_dsp(skl);
 		if (err < 0) {
@@ -679,6 +707,12 @@
 		}
 	}
 
+	/*
+	 * we are done probling so decrement link counts
+	 */
+	list_for_each_entry(hlink, &ebus->hlink_list, list)
+		snd_hdac_ext_bus_link_put(ebus, hlink);
+
 	/*configure PM */
 	pm_runtime_put_noidle(bus->dev);
 	pm_runtime_allow(bus->dev);
@@ -693,6 +727,8 @@
 	skl_free_dsp(skl);
 out_mach_free:
 	skl_machine_device_unregister(skl);
+out_nhlt_free:
+	skl_nhlt_free(skl->nhlt);
 out_free:
 	skl->init_failed = 1;
 	skl_free(ebus);
@@ -743,6 +779,7 @@
 	skl_free_dsp(skl);
 	skl_machine_device_unregister(skl);
 	skl_dmic_device_unregister(skl);
+	skl_nhlt_free(skl->nhlt);
 	skl_free(ebus);
 	dev_set_drvdata(&pci->dev, NULL);
 }
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index 39e16fa..4b4b387 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -66,7 +66,7 @@
 	struct platform_device *dmic_dev;
 	struct platform_device *i2s_dev;
 
-	void *nhlt; /* nhlt ptr */
+	struct nhlt_acpi_table *nhlt; /* nhlt ptr */
 	struct skl_sst *skl_sst; /* sst skl ctx */
 
 	struct skl_dsp_resource resource;
@@ -103,8 +103,8 @@
 int skl_platform_unregister(struct device *dev);
 int skl_platform_register(struct device *dev);
 
-void *skl_nhlt_init(struct device *dev);
-void skl_nhlt_free(void *addr);
+struct nhlt_acpi_table *skl_nhlt_init(struct device *dev);
+void skl_nhlt_free(struct nhlt_acpi_table *addr);
 struct nhlt_specific_cfg *skl_get_ep_blob(struct skl *skl, u32 instance,
 			u8 link_type, u8 s_fmt, u8 no_ch, u32 s_rate, u8 dirn);
 
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index 2f8e204..574c6af 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -34,6 +34,13 @@
 
 	struct regmap *regmap;
 
+/*
+ * Used to indicate the tx/rx status.
+ * I2S controller hopes to start the tx and rx together,
+ * also to stop them when they are both try to stop.
+*/
+	bool tx_start;
+	bool rx_start;
 	bool is_master_mode;
 };
 
@@ -75,29 +82,37 @@
 				   I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
 
 		regmap_update_bits(i2s->regmap, I2S_XFER,
-				   I2S_XFER_TXS_START,
-				   I2S_XFER_TXS_START);
+				   I2S_XFER_TXS_START | I2S_XFER_RXS_START,
+				   I2S_XFER_TXS_START | I2S_XFER_RXS_START);
+
+		i2s->tx_start = true;
 	} else {
+		i2s->tx_start = false;
+
 		regmap_update_bits(i2s->regmap, I2S_DMACR,
 				   I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE);
 
-		regmap_update_bits(i2s->regmap, I2S_XFER,
-				   I2S_XFER_TXS_START,
-				   I2S_XFER_TXS_STOP);
+		if (!i2s->rx_start) {
+			regmap_update_bits(i2s->regmap, I2S_XFER,
+					   I2S_XFER_TXS_START |
+					   I2S_XFER_RXS_START,
+					   I2S_XFER_TXS_STOP |
+					   I2S_XFER_RXS_STOP);
 
-		regmap_update_bits(i2s->regmap, I2S_CLR,
-				   I2S_CLR_TXC,
-				   I2S_CLR_TXC);
+			regmap_update_bits(i2s->regmap, I2S_CLR,
+					   I2S_CLR_TXC | I2S_CLR_RXC,
+					   I2S_CLR_TXC | I2S_CLR_RXC);
 
-		regmap_read(i2s->regmap, I2S_CLR, &val);
-
-		/* Should wait for clear operation to finish */
-		while (val & I2S_CLR_TXC) {
 			regmap_read(i2s->regmap, I2S_CLR, &val);
-			retry--;
-			if (!retry) {
-				dev_warn(i2s->dev, "fail to clear\n");
-				break;
+
+			/* Should wait for clear operation to finish */
+			while (val) {
+				regmap_read(i2s->regmap, I2S_CLR, &val);
+				retry--;
+				if (!retry) {
+					dev_warn(i2s->dev, "fail to clear\n");
+					break;
+				}
 			}
 		}
 	}
@@ -113,29 +128,37 @@
 				   I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE);
 
 		regmap_update_bits(i2s->regmap, I2S_XFER,
-				   I2S_XFER_RXS_START,
-				   I2S_XFER_RXS_START);
+				   I2S_XFER_TXS_START | I2S_XFER_RXS_START,
+				   I2S_XFER_TXS_START | I2S_XFER_RXS_START);
+
+		i2s->rx_start = true;
 	} else {
+		i2s->rx_start = false;
+
 		regmap_update_bits(i2s->regmap, I2S_DMACR,
 				   I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_DISABLE);
 
-		regmap_update_bits(i2s->regmap, I2S_XFER,
-				   I2S_XFER_RXS_START,
-				   I2S_XFER_RXS_STOP);
+		if (!i2s->tx_start) {
+			regmap_update_bits(i2s->regmap, I2S_XFER,
+					   I2S_XFER_TXS_START |
+					   I2S_XFER_RXS_START,
+					   I2S_XFER_TXS_STOP |
+					   I2S_XFER_RXS_STOP);
 
-		regmap_update_bits(i2s->regmap, I2S_CLR,
-				   I2S_CLR_RXC,
-				   I2S_CLR_RXC);
+			regmap_update_bits(i2s->regmap, I2S_CLR,
+					   I2S_CLR_TXC | I2S_CLR_RXC,
+					   I2S_CLR_TXC | I2S_CLR_RXC);
 
-		regmap_read(i2s->regmap, I2S_CLR, &val);
-
-		/* Should wait for clear operation to finish */
-		while (val & I2S_CLR_RXC) {
 			regmap_read(i2s->regmap, I2S_CLR, &val);
-			retry--;
-			if (!retry) {
-				dev_warn(i2s->dev, "fail to clear\n");
-				break;
+
+			/* Should wait for clear operation to finish */
+			while (val) {
+				regmap_read(i2s->regmap, I2S_CLR, &val);
+				retry--;
+				if (!retry) {
+					dev_warn(i2s->dev, "fail to clear\n");
+					break;
+				}
 			}
 		}
 	}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index d2e62b15..16369ca 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -930,7 +930,18 @@
 	return NULL;
 }
 
-static struct snd_soc_dai *snd_soc_find_dai(
+/**
+ * snd_soc_find_dai - Find a registered DAI
+ *
+ * @dlc: name of the DAI and optional component info to match
+ *
+ * This function will search all regsitered components and their DAIs to
+ * find the DAI of the same name. The component's of_node and name
+ * should also match if being specified.
+ *
+ * Return: pointer of DAI, or NULL if not found.
+ */
+struct snd_soc_dai *snd_soc_find_dai(
 	const struct snd_soc_dai_link_component *dlc)
 {
 	struct snd_soc_component *component;
@@ -959,6 +970,7 @@
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(snd_soc_find_dai);
 
 static bool soc_is_dai_link_bound(struct snd_soc_card *card,
 		struct snd_soc_dai_link *dai_link)
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 6fd1906..6cef397 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -163,31 +163,42 @@
 	}
 
 	/*
-	 * Prepare formats mask for valid/allowed sample types. If the dma does
-	 * not have support for the given physical word size, it needs to be
-	 * masked out so user space can not use the format which produces
-	 * corrupted audio.
-	 * In case the dma driver does not implement the slave_caps the default
-	 * assumption is that it supports 1, 2 and 4 bytes widths.
+	 * If SND_DMAENGINE_PCM_DAI_FLAG_PACK is set keep
+	 * hw.formats set to 0, meaning no restrictions are in place.
+	 * In this case it's the responsibility of the DAI driver to
+	 * provide the supported format information.
 	 */
-	for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) {
-		int bits = snd_pcm_format_physical_width(i);
+	if (!(dma_data->flags & SND_DMAENGINE_PCM_DAI_FLAG_PACK))
+		/*
+		 * Prepare formats mask for valid/allowed sample types. If the
+		 * dma does not have support for the given physical word size,
+		 * it needs to be masked out so user space can not use the
+		 * format which produces corrupted audio.
+		 * In case the dma driver does not implement the slave_caps the
+		 * default assumption is that it supports 1, 2 and 4 bytes
+		 * widths.
+		 */
+		for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) {
+			int bits = snd_pcm_format_physical_width(i);
 
-		/* Enable only samples with DMA supported physical widths */
-		switch (bits) {
-		case 8:
-		case 16:
-		case 24:
-		case 32:
-		case 64:
-			if (addr_widths & (1 << (bits / 8)))
-				hw.formats |= (1LL << i);
-			break;
-		default:
-			/* Unsupported types */
-			break;
+			/*
+			 * Enable only samples with DMA supported physical
+			 * widths
+			 */
+			switch (bits) {
+			case 8:
+			case 16:
+			case 24:
+			case 32:
+			case 64:
+				if (addr_widths & (1 << (bits / 8)))
+					hw.formats |= (1LL << i);
+				break;
+			default:
+				/* Unsupported types */
+				break;
+			}
 		}
-	}
 
 	return snd_soc_set_runtime_hwparams(substream, &hw);
 }
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 3fc6358..69860da 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -350,6 +350,7 @@
 	case USB_SPEED_HIGH:
 	case USB_SPEED_WIRELESS:
 	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
 		break;
 	default:
 		dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev));
@@ -450,6 +451,9 @@
 	case USB_SPEED_SUPER:
 		strlcat(card->longname, ", super speed", sizeof(card->longname));
 		break;
+	case USB_SPEED_SUPER_PLUS:
+		strlcat(card->longname, ", super speed plus", sizeof(card->longname));
+		break;
 	default:
 		break;
 	}
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 7ccbcaf..26dd5f2 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -309,6 +309,9 @@
 	 * support reading */
 	if (snd_usb_get_sample_rate_quirk(chip))
 		return 0;
+	/* the firmware is likely buggy, don't repeat to fail too many times */
+	if (chip->sample_rate_read_error > 2)
+		return 0;
 
 	if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR,
 				   USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_IN,
@@ -316,6 +319,7 @@
 				   data, sizeof(data))) < 0) {
 		dev_err(&dev->dev, "%d:%d: cannot get freq at ep %#x\n",
 			iface, fmt->altsetting, ep);
+		chip->sample_rate_read_error++;
 		return 0; /* some devices don't support reading */
 	}
 
diff --git a/sound/usb/helper.c b/sound/usb/helper.c
index 51ed1ac..7712e2b 100644
--- a/sound/usb/helper.c
+++ b/sound/usb/helper.c
@@ -120,6 +120,7 @@
 	case USB_SPEED_HIGH:
 	case USB_SPEED_WIRELESS:
 	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
 		if (get_endpoint(alts, 0)->bInterval >= 1 &&
 		    get_endpoint(alts, 0)->bInterval <= 4)
 			return get_endpoint(alts, 0)->bInterval - 1;
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 47de8af..7ba9292 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -911,6 +911,7 @@
 	switch (snd_usb_get_speed(ep->umidi->dev)) {
 	case USB_SPEED_HIGH:
 	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
 		count = 1;
 		break;
 	default:
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 4f85757..2f8c388 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -45,6 +45,7 @@
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/usb.h>
@@ -1378,6 +1379,71 @@
 	snd_usb_mixer_add_control(&cval->head, kctl);
 }
 
+static int parse_clock_source_unit(struct mixer_build *state, int unitid,
+				   void *_ftr)
+{
+	struct uac_clock_source_descriptor *hdr = _ftr;
+	struct usb_mixer_elem_info *cval;
+	struct snd_kcontrol *kctl;
+	char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+	int ret;
+
+	if (state->mixer->protocol != UAC_VERSION_2)
+		return -EINVAL;
+
+	if (hdr->bLength != sizeof(*hdr)) {
+		usb_audio_dbg(state->chip,
+			      "Bogus clock source descriptor length of %d, ignoring.\n",
+			      hdr->bLength);
+		return 0;
+	}
+
+	/*
+	 * The only property of this unit we are interested in is the
+	 * clock source validity. If that isn't readable, just bail out.
+	 */
+	if (!uac2_control_is_readable(hdr->bmControls,
+				      ilog2(UAC2_CS_CONTROL_CLOCK_VALID)))
+		return 0;
+
+	cval = kzalloc(sizeof(*cval), GFP_KERNEL);
+	if (!cval)
+		return -ENOMEM;
+
+	snd_usb_mixer_elem_init_std(&cval->head, state->mixer, hdr->bClockID);
+
+	cval->min = 0;
+	cval->max = 1;
+	cval->channels = 1;
+	cval->val_type = USB_MIXER_BOOLEAN;
+	cval->control = UAC2_CS_CONTROL_CLOCK_VALID;
+
+	if (uac2_control_is_writeable(hdr->bmControls,
+				      ilog2(UAC2_CS_CONTROL_CLOCK_VALID)))
+		kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);
+	else {
+		cval->master_readonly = 1;
+		kctl = snd_ctl_new1(&usb_feature_unit_ctl_ro, cval);
+	}
+
+	if (!kctl) {
+		kfree(cval);
+		return -ENOMEM;
+	}
+
+	kctl->private_free = snd_usb_mixer_elem_free;
+	ret = snd_usb_copy_string_desc(state, hdr->iClockSource,
+				       name, sizeof(name));
+	if (ret > 0)
+		snprintf(kctl->id.name, sizeof(kctl->id.name),
+			 "%s Validity", name);
+	else
+		snprintf(kctl->id.name, sizeof(kctl->id.name),
+			 "Clock Source %d Validity", hdr->bClockID);
+
+	return snd_usb_mixer_add_control(&cval->head, kctl);
+}
+
 /*
  * parse a feature unit
  *
@@ -2126,10 +2192,11 @@
 
 	switch (p1[2]) {
 	case UAC_INPUT_TERMINAL:
-	case UAC2_CLOCK_SOURCE:
 		return 0; /* NOP */
 	case UAC_MIXER_UNIT:
 		return parse_audio_mixer_unit(state, unitid, p1);
+	case UAC2_CLOCK_SOURCE:
+		return parse_clock_source_unit(state, unitid, p1);
 	case UAC_SELECTOR_UNIT:
 	case UAC2_CLOCK_SELECTOR:
 		return parse_audio_selector_unit(state, unitid, p1);
@@ -2307,6 +2374,7 @@
 	__u8 unitid = (index >> 8) & 0xff;
 	__u8 control = (value >> 8) & 0xff;
 	__u8 channel = value & 0xff;
+	unsigned int count = 0;
 
 	if (channel >= MAX_CHANNELS) {
 		usb_audio_dbg(mixer->chip,
@@ -2315,6 +2383,12 @@
 		return;
 	}
 
+	for (list = mixer->id_elems[unitid]; list; list = list->next_id_elem)
+		count++;
+
+	if (count == 0)
+		return;
+
 	for (list = mixer->id_elems[unitid]; list; list = list->next_id_elem) {
 		struct usb_mixer_elem_info *info;
 
@@ -2322,7 +2396,7 @@
 			continue;
 
 		info = (struct usb_mixer_elem_info *)list;
-		if (info->control != control)
+		if (count > 1 && info->control != control)
 			continue;
 
 		switch (attribute) {
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index b665d85..4d5c89a 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -47,6 +47,7 @@
 	
 	int num_interfaces;
 	int num_suspended_intf;
+	int sample_rate_read_error;
 
 	struct list_head pcm_list;	/* list of pcm streams */
 	struct list_head ep_list;	/* list of audio-related endpoints */
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 150829d..7947e56 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -5,6 +5,7 @@
  * Test code for seccomp bpf.
  */
 
+#include <sys/types.h>
 #include <asm/siginfo.h>
 #define __have_siginfo_t 1
 #define __have_sigval_t 1
@@ -14,7 +15,6 @@
 #include <linux/filter.h>
 #include <sys/prctl.h>
 #include <sys/ptrace.h>
-#include <sys/types.h>
 #include <sys/user.h>
 #include <linux/prctl.h>
 #include <linux/ptrace.h>
@@ -1242,6 +1242,12 @@
 # define ARCH_REGS     s390_regs
 # define SYSCALL_NUM   gprs[2]
 # define SYSCALL_RET   gprs[2]
+#elif defined(__mips__)
+# define ARCH_REGS	struct pt_regs
+# define SYSCALL_NUM	regs[2]
+# define SYSCALL_SYSCALL_NUM regs[4]
+# define SYSCALL_RET	regs[2]
+# define SYSCALL_NUM_RET_SHARE_REG
 #else
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
@@ -1249,7 +1255,7 @@
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
 #define HAVE_GETREGS
 #endif
 
@@ -1273,6 +1279,10 @@
 	}
 #endif
 
+#if defined(__mips__)
+	if (regs.SYSCALL_NUM == __NR_O32_Linux)
+		return regs.SYSCALL_SYSCALL_NUM;
+#endif
 	return regs.SYSCALL_NUM;
 }
 
@@ -1297,6 +1307,13 @@
 	{
 		regs.SYSCALL_NUM = syscall;
 	}
+#elif defined(__mips__)
+	{
+		if (regs.SYSCALL_NUM == __NR_O32_Linux)
+			regs.SYSCALL_SYSCALL_NUM = syscall;
+		else
+			regs.SYSCALL_NUM = syscall;
+	}
 
 #elif defined(__arm__)
 # ifndef PTRACE_SET_SYSCALL
@@ -1327,7 +1344,11 @@
 
 	/* If syscall is skipped, change return value. */
 	if (syscall == -1)
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+		TH_LOG("Can't modify syscall return on this architecture");
+#else
 		regs.SYSCALL_RET = 1;
+#endif
 
 #ifdef HAVE_GETREGS
 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
@@ -1465,8 +1486,13 @@
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
 	ASSERT_EQ(0, ret);
 
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+	/* gettid has been skipped */
+	EXPECT_EQ(-1, syscall(__NR_gettid));
+#else
 	/* gettid has been skipped and an altered return value stored. */
 	EXPECT_EQ(1, syscall(__NR_gettid));
+#endif
 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7a79b68..e5d6108 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -41,6 +41,9 @@
 config HAVE_KVM_ARCH_TLB_FLUSH_ALL
        bool
 
+config HAVE_KVM_INVALID_WAKEUPS
+       bool
+
 config KVM_GENERIC_DIRTYLOG_READ_PROTECT
        bool
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 9aaa35d..409db33 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -17,7 +17,6 @@
  */
 
 #include <linux/cpu.h>
-#include <linux/of_irq.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
@@ -438,45 +437,29 @@
 	.notifier_call = kvm_timer_cpu_notify,
 };
 
-static const struct of_device_id arch_timer_of_match[] = {
-	{ .compatible	= "arm,armv7-timer",	},
-	{ .compatible	= "arm,armv8-timer",	},
-	{},
-};
-
 int kvm_timer_hyp_init(void)
 {
-	struct device_node *np;
-	unsigned int ppi;
+	struct arch_timer_kvm_info *info;
 	int err;
 
-	timecounter = arch_timer_get_timecounter();
-	if (!timecounter)
-		return -ENODEV;
+	info = arch_timer_get_kvm_info();
+	timecounter = &info->timecounter;
 
-	np = of_find_matching_node(NULL, arch_timer_of_match);
-	if (!np) {
-		kvm_err("kvm_arch_timer: can't find DT node\n");
+	if (info->virtual_irq <= 0) {
+		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
+			info->virtual_irq);
 		return -ENODEV;
 	}
+	host_vtimer_irq = info->virtual_irq;
 
-	ppi = irq_of_parse_and_map(np, 2);
-	if (!ppi) {
-		kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = request_percpu_irq(ppi, kvm_arch_timer_handler,
+	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
 				 "kvm guest timer", kvm_get_running_vcpus());
 	if (err) {
 		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
-			ppi, err);
+			host_vtimer_irq, err);
 		goto out;
 	}
 
-	host_vtimer_irq = ppi;
-
 	err = __register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register timer CPU notifier\n");
@@ -489,14 +472,13 @@
 		goto out_free;
 	}
 
-	kvm_info("%s IRQ%d\n", np->name, ppi);
+	kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
 	on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
 
 	goto out;
 out_free:
-	free_percpu_irq(ppi, kvm_get_running_vcpus());
+	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
 out:
-	of_node_put(np);
 	return err;
 }
 
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 67ec334..7e826c9 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -20,9 +20,6 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 
 #include <linux/irqchip/arm-gic.h>
 
@@ -186,38 +183,39 @@
 }
 
 /**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node:	pointer to the DT node
- * @ops: 	address of a pointer to the GICv2 operations
- * @params:	address of a pointer to HW-specific parameters
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
+ * @gic_kvm_info:	pointer to the GIC description
+ * @ops:		address of a pointer to the GICv2 operations
+ * @params:		address of a pointer to HW-specific parameters
  *
  * Returns 0 if a GICv2 has been found, with the low level operations
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v2_probe(struct device_node *vgic_node,
-		  const struct vgic_ops **ops,
-		  const struct vgic_params **params)
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
+		   const struct vgic_ops **ops,
+		   const struct vgic_params **params)
 {
 	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
 	struct vgic_params *vgic = &vgic_v2_params;
+	const struct resource *vctrl_res = &gic_kvm_info->vctrl;
+	const struct resource *vcpu_res = &gic_kvm_info->vcpu;
 
-	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic->maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
+	if (!gic_kvm_info->maint_irq) {
+		kvm_err("error getting vgic maintenance irq\n");
+		ret = -ENXIO;
+		goto out;
+	}
+	vgic->maint_irq = gic_kvm_info->maint_irq;
+
+	if (!gic_kvm_info->vctrl.start) {
+		kvm_err("GICH not present in the firmware table\n");
 		ret = -ENXIO;
 		goto out;
 	}
 
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain GICH resource\n");
-		goto out;
-	}
-
-	vgic->vctrl_base = of_iomap(vgic_node, 2);
+	vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start,
+				   resource_size(&gic_kvm_info->vctrl));
 	if (!vgic->vctrl_base) {
 		kvm_err("Cannot ioremap GICH\n");
 		ret = -ENOMEM;
@@ -228,29 +226,23 @@
 	vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
 
 	ret = create_hyp_io_mappings(vgic->vctrl_base,
-				     vgic->vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
+				     vgic->vctrl_base + resource_size(vctrl_res),
+				     vctrl_res->start);
 	if (ret) {
 		kvm_err("Cannot map VCTRL into hyp\n");
 		goto out_unmap;
 	}
 
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain GICV resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-
-	if (!PAGE_ALIGNED(vcpu_res.start)) {
+	if (!PAGE_ALIGNED(vcpu_res->start)) {
 		kvm_err("GICV physical address 0x%llx not page aligned\n",
-			(unsigned long long)vcpu_res.start);
+			(unsigned long long)vcpu_res->start);
 		ret = -ENXIO;
 		goto out_unmap;
 	}
 
-	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+	if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
 		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-			(unsigned long long)resource_size(&vcpu_res),
+			(unsigned long long)resource_size(vcpu_res),
 			PAGE_SIZE);
 		ret = -ENXIO;
 		goto out_unmap;
@@ -259,10 +251,10 @@
 	vgic->can_emulate_gicv2 = true;
 	kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
 
-	vgic->vcpu_base = vcpu_res.start;
+	vgic->vcpu_base = vcpu_res->start;
 
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic->maint_irq);
+	kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n",
+		 gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq);
 
 	vgic->type = VGIC_V2;
 	vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -276,6 +268,5 @@
 out_unmap:
 	iounmap(vgic->vctrl_base);
 out:
-	of_node_put(vgic_node);
 	return ret;
 }
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 999bdc6..c02a1b1 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -20,11 +20,9 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 
 #include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-common.h>
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
@@ -222,30 +220,24 @@
 }
 
 /**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node:	pointer to the DT node
- * @ops: 	address of a pointer to the GICv3 operations
- * @params:	address of a pointer to HW-specific parameters
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
+ * @gic_kvm_info:	pointer to the GIC description
+ * @ops:		address of a pointer to the GICv3 operations
+ * @params:		address of a pointer to HW-specific parameters
  *
  * Returns 0 if a GICv3 has been found, with the low level operations
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
 		  const struct vgic_ops **ops,
 		  const struct vgic_params **params)
 {
 	int ret = 0;
-	u32 gicv_idx;
-	struct resource vcpu_res;
 	struct vgic_params *vgic = &vgic_v3_params;
+	const struct resource *vcpu_res = &gic_kvm_info->vcpu;
 
-	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic->maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
+	vgic->maint_irq = gic_kvm_info->maint_irq;
 
 	ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
 
@@ -256,24 +248,19 @@
 	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
 	vgic->can_emulate_gicv2 = false;
 
-	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
-		gicv_idx = 1;
-
-	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
-	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+	if (!vcpu_res->start) {
 		kvm_info("GICv3: no GICV resource entry\n");
 		vgic->vcpu_base = 0;
-	} else if (!PAGE_ALIGNED(vcpu_res.start)) {
+	} else if (!PAGE_ALIGNED(vcpu_res->start)) {
 		pr_warn("GICV physical address 0x%llx not page aligned\n",
-			(unsigned long long)vcpu_res.start);
+			(unsigned long long)vcpu_res->start);
 		vgic->vcpu_base = 0;
-	} else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+	} else if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
 		pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-			(unsigned long long)resource_size(&vcpu_res),
+			(unsigned long long)resource_size(vcpu_res),
 			PAGE_SIZE);
-		vgic->vcpu_base = 0;
 	} else {
-		vgic->vcpu_base = vcpu_res.start;
+		vgic->vcpu_base = vcpu_res->start;
 		vgic->can_emulate_gicv2 = true;
 		kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
 					KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -286,15 +273,13 @@
 	vgic->type = VGIC_V3;
 	vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
 
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vcpu_res.start, vgic->maint_irq);
+	kvm_info("GICV base=0x%llx, IRQ=%d\n",
+		 vgic->vcpu_base, vgic->maint_irq);
 
 	on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
 
 	*ops = &vgic_v3_ops;
 	*params = vgic;
 
-out:
-	of_node_put(vgic_node);
 	return ret;
 }
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 00429b3..60668a7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -21,9 +21,7 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
+#include <linux/irq.h>
 #include <linux/rculist.h>
 #include <linux/uaccess.h>
 
@@ -33,6 +31,7 @@
 #include <trace/events/kvm.h>
 #include <asm/kvm.h>
 #include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -2389,33 +2388,38 @@
 	.notifier_call = vgic_cpu_notify,
 };
 
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic",	.data = vgic_v2_probe, },
-	{ .compatible = "arm,cortex-a7-gic",	.data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-400",		.data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3",		.data = vgic_v3_probe, },
-	{},
-};
+static int kvm_vgic_probe(void)
+{
+	const struct gic_kvm_info *gic_kvm_info;
+	int ret;
+
+	gic_kvm_info = gic_get_kvm_info();
+	if (!gic_kvm_info)
+		return -ENODEV;
+
+	switch (gic_kvm_info->type) {
+	case GIC_V2:
+		ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
+		break;
+	case GIC_V3:
+		ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
+		break;
+	default:
+		ret = -ENODEV;
+	}
+
+	return ret;
+}
 
 int kvm_vgic_hyp_init(void)
 {
-	const struct of_device_id *matched_id;
-	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-				const struct vgic_params **);
-	struct device_node *vgic_node;
 	int ret;
 
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
+	ret = kvm_vgic_probe();
+	if (ret) {
+		kvm_err("error: KVM vGIC probing failed\n");
 		return ret;
+	}
 
 	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a..e469b60 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -408,15 +408,17 @@
 	 */
 	fdput(f);
 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
-	irqfd->consumer.token = (void *)irqfd->eventfd;
-	irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
-	irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
-	irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
-	irqfd->consumer.start = kvm_arch_irq_bypass_start;
-	ret = irq_bypass_register_consumer(&irqfd->consumer);
-	if (ret)
-		pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+	if (kvm_arch_has_irq_bypass()) {
+		irqfd->consumer.token = (void *)irqfd->eventfd;
+		irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+		irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+		irqfd->consumer.start = kvm_arch_irq_bypass_start;
+		ret = irq_bypass_register_consumer(&irqfd->consumer);
+		if (ret)
+			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
 				irqfd->consumer.token, ret);
+	}
 #endif
 
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fd482f..dd4ac9d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2028,6 +2028,8 @@
 			 */
 			if (kvm_vcpu_check_block(vcpu) < 0) {
 				++vcpu->stat.halt_successful_poll;
+				if (!vcpu_valid_wakeup(vcpu))
+					++vcpu->stat.halt_poll_invalid;
 				goto out;
 			}
 			cur = ktime_get();
@@ -2053,7 +2055,9 @@
 out:
 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
-	if (halt_poll_ns) {
+	if (!vcpu_valid_wakeup(vcpu))
+		shrink_halt_poll_ns(vcpu);
+	else if (halt_poll_ns) {
 		if (block_ns <= vcpu->halt_poll_ns)
 			;
 		/* we had a long block, shrink polling */
@@ -2066,18 +2070,14 @@
 	} else
 		vcpu->halt_poll_ns = 0;
 
-	trace_kvm_vcpu_wakeup(block_ns, waited);
+	trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
+	kvm_arch_vcpu_block_finish(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 #ifndef CONFIG_S390
-/*
- * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
- */
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-	int me;
-	int cpu = vcpu->cpu;
 	struct swait_queue_head *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
@@ -2086,6 +2086,18 @@
 		++vcpu->stat.halt_wakeup;
 	}
 
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
+
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int me;
+	int cpu = vcpu->cpu;
+
+	kvm_vcpu_wake_up(vcpu);
 	me = get_cpu();
 	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
 		if (kvm_arch_vcpu_should_kick(vcpu))
@@ -2272,7 +2284,7 @@
 	int r;
 	struct kvm_vcpu *vcpu;
 
-	if (id >= KVM_MAX_VCPUS)
+	if (id >= KVM_MAX_VCPU_ID)
 		return -EINVAL;
 
 	vcpu = kvm_arch_vcpu_create(kvm, id);
@@ -2746,6 +2758,8 @@
 	case KVM_CAP_MULTI_ADDRESS_SPACE:
 		return KVM_ADDRESS_SPACE_NUM;
 #endif
+	case KVM_CAP_MAX_VCPU_ID:
+		return KVM_MAX_VCPU_ID;
 	default:
 		break;
 	}
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 09a03b5..52abac4 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -89,6 +89,9 @@
 	struct irq_bypass_producer *tmp;
 	struct irq_bypass_consumer *consumer;
 
+	if (!producer->token)
+		return -EINVAL;
+
 	might_sleep();
 
 	if (!try_module_get(THIS_MODULE))
@@ -136,6 +139,9 @@
 	struct irq_bypass_producer *tmp;
 	struct irq_bypass_consumer *consumer;
 
+	if (!producer->token)
+		return;
+
 	might_sleep();
 
 	if (!try_module_get(THIS_MODULE))
@@ -177,7 +183,8 @@
 	struct irq_bypass_consumer *tmp;
 	struct irq_bypass_producer *producer;
 
-	if (!consumer->add_producer || !consumer->del_producer)
+	if (!consumer->token ||
+	    !consumer->add_producer || !consumer->del_producer)
 		return -EINVAL;
 
 	might_sleep();
@@ -227,6 +234,9 @@
 	struct irq_bypass_consumer *tmp;
 	struct irq_bypass_producer *producer;
 
+	if (!consumer->token)
+		return;
+
 	might_sleep();
 
 	if (!try_module_get(THIS_MODULE))